R markdown for exam

This is the recording of R scripts.

setwd("~/Google Drive/My Drive/0_Iden2/KI-UT_doctoral_course/Data")
exp <- read.table("ExpressionTable.txt", header=TRUE, sep="\t", row.names = 1)
str(exp)
## 'data.frame':    29189 obs. of  138 variables:
##  $ NG1f0  : int  319 11 1217 34 36 3 61 3 15 69 ...
##  $ NG1h0  : int  591 17 1976 65 58 2 140 11 44 137 ...
##  $ NG2f0  : int  418 19 866 45 58 12 65 11 62 139 ...
##  $ NG2h0  : int  424 9 751 51 68 7 55 8 42 128 ...
##  $ NG3f0  : int  542 3 2854 99 44 16 100 7 53 98 ...
##  $ NG3h0  : int  1280 32 5270 152 126 21 205 13 77 370 ...
##  $ NG4f0  : int  326 18 684 46 55 10 39 13 40 25 ...
##  $ NG4h0  : int  349 23 822 21 46 19 39 15 29 20 ...
##  $ NK13f0 : int  266 4 1111 21 31 2 57 3 11 72 ...
##  $ NK13h0 : int  503 14 1849 72 55 5 122 7 33 112 ...
##  $ NK14f0 : int  174 1 1409 15 22 4 8 2 17 31 ...
##  $ NK14h0 : int  3183 79 18564 663 428 27 223 75 274 862 ...
##  $ NK15f0 : int  84 9 548 32 25 2 50 3 15 55 ...
##  $ NK15h0 : int  107 8 508 20 26 5 32 2 13 63 ...
##  $ NK22f0 : int  70 8 631 25 27 1 42 2 10 48 ...
##  $ NK22h0 : int  117 6 628 26 32 2 47 0 23 44 ...
##  $ NK25f0 : int  1982 33 11739 425 315 14 631 92 221 644 ...
##  $ NK25h0 : int  1697 88 8625 366 329 11 492 64 183 434 ...
##  $ NK26f0 : int  156 4 741 37 26 3 58 2 17 64 ...
##  $ NK26h0 : int  104 6 472 16 17 2 40 2 6 47 ...
##  $ NK27f0 : int  86 4 586 16 22 2 30 2 10 35 ...
##  $ NK27h0 : int  101 1 526 16 29 1 34 3 17 39 ...
##  $ NK29f0 : int  1548 53 13590 237 246 23 490 28 163 303 ...
##  $ NK29h0 : int  1194 31 5922 147 136 13 280 27 80 264 ...
##  $ NK2f0  : int  899 13 4236 189 139 24 229 28 143 308 ...
##  $ NK2h0  : int  657 33 2863 137 118 16 124 15 98 199 ...
##  $ NK35f0 : int  114 8 659 23 19 2 57 5 13 56 ...
##  $ NK35h0 : int  145 17 726 20 35 2 57 6 20 56 ...
##  $ NK37f0 : int  151 9 776 14 14 5 27 4 6 24 ...
##  $ NK37h0 : int  994 49 3289 95 262 70 190 35 126 232 ...
##  $ NK38f0 : int  131 7 654 36 28 1 54 7 18 66 ...
##  $ NK38h0 : int  64 5 275 13 15 2 27 2 6 20 ...
##  $ NK39f0 : int  1042 16 6362 250 156 9 332 27 145 320 ...
##  $ NK39h0 : int  2180 75 9627 357 385 92 605 64 295 435 ...
##  $ NK3f0  : int  1 0 3 0 2 0 0 0 0 3 ...
##  $ NK3h0  : int  8 0 6 0 2 0 1 0 3 0 ...
##  $ NK40f0 : int  925 20 5698 168 172 11 252 43 76 260 ...
##  $ NK40h0 : int  229 9 1477 40 31 2 87 6 27 57 ...
##  $ NK44f0 : int  68 2 444 13 22 1 31 1 15 27 ...
##  $ NK44h0 : int  44 2 260 11 9 1 8 3 11 16 ...
##  $ NK7f0  : int  84 3 599 22 28 2 39 0 8 43 ...
##  $ NK7h0  : int  61 0 330 16 14 1 19 4 10 23 ...
##  $ NK8f0  : int  209 11 682 27 32 3 56 2 13 47 ...
##  $ NK8h0  : int  410 15 1296 49 62 12 95 9 33 140 ...
##  $ NK9f0  : int  1846 42 10980 374 290 14 635 59 175 616 ...
##  $ NK9h0  : int  2787 106 13502 574 452 21 771 82 300 756 ...
##  $ NO101f0: int  3495 119 21544 475 466 47 952 83 268 682 ...
##  $ NO101f2: int  134 8 521 22 24 6 26 4 10 27 ...
##  $ NO101h0: int  3886 122 19839 580 625 44 1019 106 304 816 ...
##  $ NO101h2: int  75 6 252 13 10 3 11 0 6 16 ...
##  $ NO108f0: int  1411 51 11755 224 180 15 424 29 60 214 ...
##  $ NO108f2: int  166 5 593 21 21 2 41 4 14 26 ...
##  $ NO108h0: int  1947 70 11126 226 170 32 459 66 114 303 ...
##  $ NO108h2: int  235 6 610 36 29 2 30 5 19 84 ...
##  $ NO117f0: int  103 3 971 16 13 2 22 3 3 14 ...
##  $ NO117f2: int  127 4 564 17 10 3 32 3 10 21 ...
##  $ NO117h0: int  140 1 901 19 11 3 38 4 7 25 ...
##  $ NO117h2: int  244 3 742 25 50 1 34 12 24 73 ...
##  $ NO121f0: int  2146 71 11364 319 335 40 591 73 160 459 ...
##  $ NO121f2: int  1407 39 8913 304 176 14 156 36 201 367 ...
##  $ NO121h0: int  3146 112 16724 487 555 42 820 93 232 667 ...
##  $ NO121h2: int  221 5 1052 32 27 4 46 6 13 36 ...
##  $ NO125f0: int  875 6 4823 273 186 12 24 14 110 253 ...
##  $ NO125f2: int  133 4 469 14 22 3 41 2 8 36 ...
##  $ NO125h0: int  339 9 1946 89 62 4 12 2 33 55 ...
##  $ NO125h2: int  116 7 456 27 27 2 27 5 19 46 ...
##  $ NO131f0: int  1458 46 11759 205 208 27 406 48 60 238 ...
##  $ NO131f2: int  152 10 996 24 44 4 55 9 16 41 ...
##  $ NO131h0: int  2138 84 11892 232 194 39 440 46 116 306 ...
##  $ NO131h2: int  88 11 593 17 24 2 70 5 10 44 ...
##  $ NO135f0: int  1775 53 8948 250 288 22 456 67 127 383 ...
##  $ NO135f2: int  212 5 687 32 44 4 66 6 15 69 ...
##  $ NO135h0: int  2202 60 10663 335 359 27 541 69 166 486 ...
##  $ NO135h2: int  286 21 951 37 49 6 84 9 20 97 ...
##  $ NO146f0: int  97 6 679 19 20 3 52 3 6 45 ...
##  $ NO146f2: int  362 8 1904 46 50 5 106 7 35 75 ...
##  $ NO146h0: int  129 12 597 31 40 5 60 8 19 62 ...
##  $ NO146h2: int  860 16 2701 37 108 79 173 16 62 103 ...
##  $ NO150f0: int  1437 42 12393 219 181 29 430 32 76 204 ...
##  $ NO150f2: int  98 0 283 2 11 1 12 2 6 18 ...
##  $ NO150h0: int  1854 86 11884 199 199 45 412 61 112 283 ...
##  $ NO150h2: int  118 3 343 14 23 3 27 3 9 29 ...
##  $ NO22f0 : int  220 5 247 13 37 8 26 1 25 37 ...
##  $ NO22f2 : int  116 2 412 13 23 2 36 2 13 21 ...
##  $ NO22h0 : int  141 16 144 27 23 1 15 0 12 32 ...
##  $ NO22h2 : int  131 5 543 27 47 1 43 8 11 36 ...
##  $ NO27f0 : int  318 11 420 75 50 1 24 12 69 67 ...
##  $ NO27f2 : int  261 6 1066 19 44 4 55 3 15 85 ...
##  $ NO27h0 : int  301 35 357 56 60 3 24 6 71 69 ...
##  $ NO27h2 : int  261 13 1121 26 30 3 59 4 24 75 ...
##  $ NO32f0 : int  371 32 2570 133 55 23 94 11 101 109 ...
##  $ NO32f2 : int  226 15 1389 31 61 5 93 7 21 84 ...
##  $ NO32h0 : int  440 66 3294 125 72 18 147 29 77 90 ...
##  $ NO32h2 : int  223 19 1342 46 41 8 120 8 23 92 ...
##  $ NO37f0 : int  196 3 1028 0 32 8 70 1 35 32 ...
##  $ NO37f2 : int  7848 264 37123 1648 919 32 1764 158 568 1531 ...
##  $ NO37h0 : int  163 4 1153 0 23 0 42 4 33 10 ...
##  $ NO37h2 : int  5563 237 29208 988 508 32 1263 66 359 951 ...
##  $ NO40f0 : int  279 4 1593 32 38 12 46 0 36 48 ...
##   [list output truncated]
exp_norm <- apply(exp, 2, function(x) x/sum(x)*1000000)
head(exp_norm)
##                             NG1f0       NG1h0      NG2f0      NG2h0       NG3f0
## chr1:564464..564559,+  85.0370058  96.1221464 16.4873620 21.4247380 113.1779663
## chr1:564927..564932,+   2.9323105   2.7649348  0.7494255  0.4547704   0.6264463
## chr1:565020..565083,+ 324.4201756 321.3830139 34.1580275 37.9480619 595.9592543
## chr1:565114..565117,+   9.0635053  10.5718097  1.7749552  2.5770322  20.6727282
## chr1:565152..565156,+   9.5966527   9.4333071  2.2877201  3.4360429   9.1878792
## chr1:565315..565316,+   0.7997211   0.3252865  0.4733214  0.3537103   3.3410470
##                            NG3h0      NG4f0      NG4h0      NK13f0      NK13h0
## chr1:564464..564559,+ 160.812788 19.3453016 17.7881609  76.8106153  91.8602337
## chr1:564927..564932,+   4.020320  1.0681455  1.1722857   1.1550468   2.5567461
## chr1:565020..565083,+ 662.096401 40.5895285 41.8964708 320.8142618 337.6731054
## chr1:565114..565117,+  19.096519  2.7297051  1.0703478   6.0639959  13.1489798
## chr1:565152..565156,+  15.830009  3.2637779  2.3445714   8.9516131  10.0443595
## chr1:565315..565316,+   2.638335  0.5934142  0.9684099   0.5775234   0.9131236
##                             NK14f0     NK14h0      NK15f0     NK15h0
## chr1:564464..564559,+  152.5480789 133.167357  16.1100732 18.0544827
## chr1:564927..564932,+    0.8767131   3.305128   1.7260793  1.3498679
## chr1:565020..565083,+ 1235.2887542 776.663155 105.0990491 85.7166094
## chr1:565114..565117,+   13.1506965  27.737970   6.1371708  3.3746697
## chr1:565152..565156,+   19.2876881  17.906261   4.7946647  4.3870706
## chr1:565315..565316,+    3.5068524   1.129601   0.3835732  0.8436674
##                            NK22f0      NK22h0      NK25f0      NK25h0
## chr1:564464..564559,+  23.5188342  25.9203037 113.1373466 109.8879894
## chr1:564927..564932,+   2.6878668   1.3292463   1.8837197   5.6983754
## chr1:565020..565083,+ 212.0054913 139.1277840 670.0904699 558.5055441
## chr1:565114..565117,+   8.3995836   5.7600675  24.2600264  23.7000613
## chr1:565152..565156,+   9.0715503   7.0893138  17.9809607  21.3041535
## chr1:565315..565316,+   0.3359833   0.4430821   0.7991538   0.7122969
##                            NK26f0      NK26h0      NK27f0      NK27h0
## chr1:564464..564559,+  24.1738734  22.5222250  30.6120742  26.2861952
## chr1:564927..564932,+   0.6198429   1.2993591   1.4238174   0.2602594
## chr1:565020..565083,+ 114.8258985 102.2162519 208.5892497 136.8964228
## chr1:565114..565117,+   5.7335469   3.4649577   5.6952696   4.1641497
## chr1:565152..565156,+   4.0289789   3.6815175   7.8309957   7.5475214
## chr1:565315..565316,+   0.4648822   0.4331197   0.7119087   0.2602594
##                           NK29f0      NK29h0      NK2f0      NK2h0      NK35f0
## chr1:564464..564559,+  68.472270  83.3017405 166.444617 146.265846  18.7411873
## chr1:564927..564932,+   2.344335   2.1627755   2.406874   7.346686   1.3151710
## chr1:565020..565083,+ 601.122839 413.1598890 784.270743 637.380697 108.3372145
## chr1:565114..565117,+  10.483158  10.2557419  34.992250  30.499880   3.7811167
## chr1:565152..565156,+  10.881252   9.4883055  25.735041  26.269969   3.1235312
## chr1:565315..565316,+   1.017353   0.9069704   4.443460   3.562030   0.3287928
##                            NK35h0     NK37f0     NK37h0     NK38f0     NK38h0
## chr1:564464..564559,+  25.4211005  54.937539 177.875763 19.9262910 20.6741718
## chr1:564927..564932,+   2.9804049   3.274423   8.768524  1.0647636  1.6151697
## chr1:565020..565083,+ 127.2808206 282.328013 588.564772 99.4793458 88.8343320
## chr1:565114..565117,+   3.5063587   5.093547  17.000199  5.4759273  4.1994412
## chr1:565152..565156,+   6.1361277   5.093547  46.884758  4.2590546  4.8455090
## chr1:565315..565316,+   0.3506359   1.819124  12.526462  0.1521091  0.6460679
##                            NK39f0     NK39h0    NK3f0     NK3h0     NK40f0
## chr1:564464..564559,+ 113.8146769 124.594489  4.14776 29.145915  67.231020
## chr1:564927..564932,+   1.7476342   4.286508  0.00000  0.000000   1.453644
## chr1:565020..565083,+ 694.9030467 550.216123 12.44328 21.859437 414.143081
## chr1:565114..565117,+  27.3067843  20.403776  0.00000  0.000000  12.210607
## chr1:565152..565156,+  17.0394334  22.004073  8.29552  7.286479  12.501336
## chr1:565315..565316,+   0.9830442   5.258116  0.00000  0.000000   0.799504
##                            NK40h0      NK44f0      NK44h0       NK7f0
## chr1:564464..564559,+  37.5137339  30.9654621  23.0919126  29.9408988
## chr1:564927..564932,+   1.4743389   0.9107489   1.0496324   1.0693178
## chr1:565020..565083,+ 241.9553927 202.1862527 136.4522110 213.5071236
## chr1:565114..565117,+   6.5526173   5.9198678   5.7729782   7.8416640
## chr1:565152..565156,+   5.0782784  10.0182377   4.7233458   9.9802996
## chr1:565315..565316,+   0.3276309   0.4553744   0.5248162   0.7128785
##                             NK7h0       NK8f0      NK8h0      NK9f0       NK9h0
## chr1:564464..564559,+  24.6677337  52.2848740  98.023277 113.541399 112.5067410
## chr1:564927..564932,+   0.0000000   2.7518355   3.586217   2.583282   4.2790508
## chr1:565020..565083,+ 133.4483955 170.6137994 309.849188 675.343747 545.0541863
## chr1:565114..565117,+   6.4702252   6.7545053  11.714977  23.003512  23.1714637
## chr1:565152..565156,+   5.6614471   8.0053396  14.823032  17.836948  18.2465185
## chr1:565315..565316,+   0.4043891   0.7505006   2.868974   0.861094   0.8477365
##                          NO101f0    NO101f2    NO101h0   NO101h2     NO108f0
## chr1:564464..564559,+  92.990308  41.196937  90.293699 29.080645  85.3199447
## chr1:564927..564932,+   3.166194   2.459519   2.834748  2.326452   3.0838534
## chr1:565020..565083,+ 573.214077 160.176151 460.971873 97.710966 710.7979797
## chr1:565114..565117,+  12.638168   6.763676  13.476672  5.040645  13.5447680
## chr1:565152..565156,+  12.398708   7.378556  14.522275  3.877419  10.8841885
## chr1:565315..565316,+   1.250513   1.844639   1.022368  1.163226   0.9070157
##                           NO108f2    NO108h0    NO108h2     NO117f0    NO117f2
## chr1:564464..564559,+  51.8636062 106.696893 37.6186769  26.9517587  58.444251
## chr1:564927..564932,+   1.5621568   3.836046  0.9604769   0.7850027   1.840764
## chr1:565020..565083,+ 185.2717981 609.712190 97.6484805 254.0792010 259.547697
## chr1:565114..565117,+   6.5610586  12.384950  5.7628611   4.1866810   7.823246
## chr1:565152..565156,+   6.5610586   9.316113  4.6423048   3.4016783   4.601910
## chr1:565315..565316,+   0.6248627   1.753621  0.3201590   0.5233351   1.380573
##                           NO117h0     NO117h2    NO121f0    NO121f2    NO121h0
## chr1:564464..564559,+  40.7711454  46.5489487  94.812057 120.399758  93.824868
## chr1:564927..564932,+   0.2912225   0.5723231   3.136839   3.337307   3.340237
## chr1:565020..565083,+ 262.3914432 141.5545899 502.070932 762.702945 498.768944
## chr1:565114..565117,+   5.5332269   4.7693595  14.093684  26.013878  14.524066
## chr1:565152..565156,+   3.2034471   9.5387190  14.800577  15.060666  16.552067
## chr1:565315..565316,+   0.8736674   0.1907744   1.767233   1.198008   1.252589
##                          NO121h2     NO125f0    NO125f2     NO125h0     NO125h2
## chr1:564464..564559,+  85.072999  54.8659515  51.280727  33.3425525  41.1220069
## chr1:564927..564932,+   1.924728   0.3762237   1.542277   0.8852005   2.4815004
## chr1:565020..565083,+ 404.962874 302.4211247 180.832036 191.4000213 161.6520270
## chr1:565114..565117,+  12.318262  17.1181769   5.397971   8.7536495   9.5715016
## chr1:565152..565156,+  10.393534  11.6629337   8.482526   6.0980480   9.5715016
## chr1:565315..565316,+   1.539783   0.7524473   1.156708   0.3934224   0.7090001
##                          NO131f0    NO131f2    NO131h0     NO131h2    NO135f0
## chr1:564464..564559,+  91.204102  53.646668 110.413946  23.2833984  84.481794
## chr1:564927..564932,+   2.877496   3.529386   4.338060   2.9104248   2.522555
## chr1:565020..565083,+ 735.575474 351.526848 614.145297 156.8983550 425.883432
## chr1:565114..565117,+  12.823622   8.470526  11.981308   4.4979292  11.898844
## chr1:565152..565156,+  13.011285  15.529298  10.018852   6.3500177  13.707469
## chr1:565315..565316,+   1.688965   1.411754   2.014099   0.5291681   1.047098
##                          NO135f2    NO135h0    NO135h2     NO146f0     NO146f2
## chr1:564464..564559,+  64.568279  88.872540  70.715482  18.4570198  32.8677851
## chr1:564927..564932,+   1.522837   2.421595   5.192396   1.1416713   0.7263599
## chr1:565020..565083,+ 209.237771 430.357810 235.141342 129.1991385 172.8736541
## chr1:565114..565117,+   9.746155  13.520573   9.148506   3.6152925   4.1765694
## chr1:565152..565156,+  13.400964  14.489211  12.115590   3.8055711   4.5397493
## chr1:565315..565316,+   1.218269   1.089718   1.483542   0.5708357   0.4539749
##                          NO146h0    NO146h2    NO150f0     NO150f2    NO150h0
## chr1:564464..564559,+ 19.8809271  85.412965  84.974116  72.4338819  92.887684
## chr1:564927..564932,+  1.8493886   1.589078   2.483586   0.0000000   4.308706
## chr1:565020..565083,+ 92.0070813 268.256301 732.835229 209.1713120 595.403042
## chr1:565114..565117,+  4.7775871   3.674744  12.950126   1.4782425   9.970145
## chr1:565152..565156,+  6.1646286  10.726279  10.703072   8.1303337   9.970145
## chr1:565315..565316,+  0.7705786   7.846075   1.714857   0.7391212   2.254555
##                          NO150h2     NO22f0      NO22f2      NO22h0      NO22h2
## chr1:564464..564559,+  59.229752 13.4081925  50.2076910 12.43276278  40.3542798
## chr1:564927..564932,+   1.505841  0.3047316   0.8656498  1.41080996   1.5402397
## chr1:565020..565083,+ 172.167839 15.0537434 178.3238682 12.69728965 167.2700299
## chr1:565114..565117,+   7.027259  0.7923023   5.6267240  2.38074181   8.3172943
## chr1:565152..565156,+  11.544782  2.2550142   9.9549732  2.02803932  14.4782530
## chr1:565315..565316,+   1.505841  0.4875706   0.8656498  0.08817562   0.3080479
##                            NO27f0      NO27f2     NO27h0     NO27h2     NO32f0
## chr1:564464..564559,+ 27.88429980  48.5280115 27.3164385  35.452501  36.607313
## chr1:564927..564932,+  0.96455125   1.1155865  3.1763301   1.765833   3.157504
## chr1:565020..565083,+ 36.82832049 198.2025299 32.3985666 152.269171 253.587048
## chr1:565114..565117,+  6.57648580   3.5326905  5.0821281   3.531667  13.123376
## chr1:565152..565156,+  4.38432387   8.1809675  5.4451372   4.075000   5.426960
## chr1:565315..565316,+  0.08768648   0.7437243  0.2722569   0.407500   2.269456
##                           NO32f2     NO32h0     NO32h2      NO37f0       NO37f2
## chr1:564464..564559,+  55.931706  34.455092  27.999092  34.0869565  240.0003229
## chr1:564927..564932,+   3.712281   5.168264   2.385573   0.5217391    8.0734054
## chr1:565020..565083,+ 343.757254 257.943347 168.496778 178.7826087 1135.2614664
## chr1:565114..565117,+   7.672048   9.788378   5.775597   0.0000000   50.3976213
## chr1:565152..565156,+  15.096611   5.638106   5.147815   5.5652174   28.1040134
## chr1:565315..565316,+   1.237427   1.409526   1.004452   1.3913043    0.9785946
##                           NO37h0      NO37h2      NO40f0      NO40f2     NO40h0
## chr1:564464..564559,+  45.183470  193.115460  60.2516315  34.0782968  82.623436
## chr1:564927..564932,+   1.108797    8.227281   0.8638227   1.3364038   4.551875
## chr1:565020..565083,+ 319.610679 1013.934272 344.0173801 133.8631135 407.737692
## chr1:565114..565117,+   0.000000   34.297694   6.9105814   5.3456152   9.379622
## chr1:565152..565156,+   6.375582   17.634847   8.2063154  11.1366983   9.793429
## chr1:565315..565316,+   0.000000    1.110857   2.5914680   0.4454679   2.482841
##                           NO40h2    NO47f0      NO47f2     NO47h0      NO47h2
## chr1:564464..564559,+ 31.8895500 23.255418  73.6585095 13.7639659  63.4373990
## chr1:564927..564932,+  1.4981668  3.605491   0.6037583  1.4488385   1.0133770
## chr1:565020..565083,+ 80.2589346 28.843929 313.9543027 13.4346844 200.6486423
## chr1:565114..565117,+  4.4945003  3.515354   8.7544950  3.0952459   7.2963143
## chr1:565152..565156,+  7.2768101  6.039198  10.8676489  2.9635333  10.9444714
## chr1:565315..565316,+  0.4280477  1.442196   2.7169122  0.7902756   0.6080262
##                           NO48f0     NO48f2     NO48h0      NO48h2     NO50f0
## chr1:564464..564559,+ 10.2808841  30.874208 11.5356128  23.5728598  85.848666
## chr1:564927..564932,+  0.6209930   4.749878  0.6819081   2.1608455   2.528875
## chr1:565020..565083,+ 58.4423412 154.031764 52.2796246 140.2585157 635.147029
## chr1:565114..565117,+  1.9319782   7.124817  2.3866785   4.1252505  15.838746
## chr1:565152..565156,+  1.0349883   5.089155  2.0457244   4.7145720  15.705647
## chr1:565315..565316,+  0.4139953   1.017831  0.7387338   0.5893215   5.190850
##                            NO50f2     NO50h0      NO50h2     NO51f0     NO51f2
## chr1:564464..564559,+  58.7403985 120.765296  47.4187427 118.163382  96.892922
## chr1:564927..564932,+   1.9196209   4.378625   1.9411181   3.132041   1.524539
## chr1:565020..565083,+ 250.3185611 535.887173 150.5753058 699.394349 325.234982
## chr1:565114..565117,+   9.5981043  16.102039   6.1006570  14.900925   8.469661
## chr1:565152..565156,+   9.5981043  19.774434  11.3694061   9.680855  11.688132
## chr1:565315..565316,+   0.3839242   2.471804   0.5546052   3.416772   1.863325
##                           NO51h0     NO51h2     NO56f0      NO56f2      NO56h0
## chr1:564464..564559,+ 106.727657  72.094730  37.077257  94.2028304  29.7141724
## chr1:564927..564932,+   5.681320   3.425668   1.678066   0.6408356   2.1224409
## chr1:565020..565083,+ 755.209771 341.788192 131.688190 318.2816720 130.6059159
## chr1:565114..565117,+  15.319274  10.588427  10.388025   9.3989219   6.5947270
## chr1:565152..565156,+  13.391683  14.169807   4.394934   9.8261456   4.3206832
## chr1:565315..565316,+   3.449373   0.934273   4.315026   1.0680593   0.6822131
##                            NO56h2     NO68f0      NO68f2      NO68h0     NO68h2
## chr1:564464..564559,+  72.9479322 179.921438  48.2781357  200.341903  48.298057
## chr1:564927..564932,+   4.4264522   2.345345   0.2235099    4.579243   1.145408
## chr1:565020..565083,+ 327.0262882 722.254461 210.5463140 1159.693415 197.582959
## chr1:565114..565117,+  10.9776014  43.668083   6.2582769   34.760621  11.454085
## chr1:565152..565156,+  11.6858338  27.362354   6.9288065   16.443647   9.545070
## chr1:565315..565316,+   0.8852904   2.568711   0.8940396    1.248885   0.954507
##                           NO73f0      NO73f2     NO73h0      NO73h2     NO77f0
## chr1:564464..564559,+  88.353003  84.0010977 196.691114  70.0514045  96.454040
## chr1:564927..564932,+   1.827993   0.7601909   4.972528   0.9127219   2.810168
## chr1:565020..565083,+ 502.088787 225.3966106 843.672278 180.4907523 566.632000
## chr1:565114..565117,+  19.498594   3.0407637  42.818993   5.9326922  14.192766
## chr1:565152..565156,+  15.537942   6.0815274  45.581508  17.3417158  12.830261
## chr1:565315..565316,+   4.874648   1.1402864   7.182541   1.5972633   1.532819
##                           NO77f2     NO77h0     NO77h2     NO89f0      NO89f2
## chr1:564464..564559,+ 101.484273  92.191732  69.964272  88.732834  170.615163
## chr1:564927..564932,+   1.301080   2.898106   5.230974   3.348409    5.070287
## chr1:565020..565083,+ 321.180994 448.248068 325.846066 718.977783 1066.978557
## chr1:565114..565117,+   7.434745  12.915970  10.897862  12.897575   28.520365
## chr1:565152..565156,+  12.639067  14.034137  12.859477  12.029469   23.766971
## chr1:565315..565316,+   1.672818   1.163807   1.307743   1.364167    1.774601
##                           NO89h0      NO89h2
## chr1:564464..564559,+ 104.729658  51.9005943
## chr1:564927..564932,+   4.994407   0.8151926
## chr1:565020..565083,+ 613.241834 284.5022108
## chr1:565114..565117,+  10.039777   7.8801950
## chr1:565152..565156,+  10.192667  13.3148122
## chr1:565315..565316,+   2.140460   2.4455777
write.table(exp_norm, file = "ExpressionTable_norm.tsv", sep="\t", quote = FALSE)

column is not right, so i adjusted and added “TC” to column1,row1 manually.

To make PCA, merge cohort “newcond3” column.

# === Step 1: データ読み込み ===
# Expression matrix: 正しく row.names に"TC"列を指定
exp_norm <- read.table("ExpressionTable_norm.tsv", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)

# Cohort: Cond → newcond3 の対応表
cohort <- read.delim("Cohort.txt", header=TRUE, stringsAsFactors=FALSE)

# === Step 2: 置換用マッピングを作成 ===
label_map <- setNames(cohort$newcond3, cohort$Cond)

# === Step 3: 置換処理 ===
# 元の列名を取得(例:NG1f0, NG1h0, ...)
old_colnames <- colnames(exp_norm)

# 対応するものがあれば newcond3 に置換、なければそのまま
new_colnames <- ifelse(old_colnames %in% names(label_map), label_map[old_colnames], old_colnames)

# 列名を更新
colnames(exp_norm) <- new_colnames

# === Step 4: 正しく出力 ===
write.table(exp_norm, file="ExpressionTable_norm_renamed.tsv", sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)

Do PCA analysis.

# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)

# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)

# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)

# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
                      ifelse(grepl("h", pca_df$Sample), "h", "other"))

# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
                       ifelse(pca_df$Type == "h", 0.8, 1.0))

# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
                       "OB.f" = "orange",    "OB.h" = "orange",
                       "POB.f" = "forestgreen", "POB.h" = "forestgreen")

# === f/hの重心を計算 ===
centroids <- pca_df %>%
  filter(Type %in% c("f", "h")) %>%
  group_by(Group, Type) %>%
  summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
  mutate(ID = paste(Group, Type, sep = "."),
         Alpha = ifelse(Type == "f", 0.3, 0.8))

# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
  select(Group, Type, PC1, PC2) %>%
  pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
  filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
  mutate(x_start = PC1_f, y_start = PC2_f,
         x_end = PC1_h, y_end = PC2_h)

# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_point(aes(color = Group, alpha = Alpha), size = 3) +
  scale_color_manual(values = color_map) +
  scale_alpha_identity() +
  # 四角い重心ポイント(f: 透明度低く, h: 高く)
  geom_point(data = centroids,
             aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
             shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
  scale_fill_manual(values = centroid_fill_map) +
  # 矢印追加
  geom_segment(data = arrow_data,
               aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
               arrow = arrow(length = unit(0.25, "cm")),
               color = "black", size = 1) +
  # 軸・タイトルなど
  theme_minimal(base_size = 14) +
  labs(title = "PCA with Group Centroids and Arrows",
       x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
       y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
  theme(legend.position = "right")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# === 描画 ===
print(p)

Prepare for the edgeR differential expression analysis. Replace with sample name.

# === Step 1: データ読み込み ===
# Expression matrix: 正しく row.names に"TC"列を指定
exp <- read.table("ExpressionTable.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)

# Cohort: Cond → newcond3 の対応表
cohort <- read.delim("Cohort.txt", header=TRUE, stringsAsFactors=FALSE)

# === Step 2: 置換用マッピングを作成 ===
label_map <- setNames(cohort$newcond3, cohort$Cond)

# === Step 3: 置換処理 ===
# 元の列名を取得(例:NG1f0, NG1h0, ...)
old_colnames <- colnames(exp)

# 対応するものがあれば newcond3 に置換、なければそのまま
new_colnames <- ifelse(old_colnames %in% names(label_map), label_map[old_colnames], old_colnames)

# 列名を更新
colnames(exp) <- new_colnames

# === Step 4: 正しく出力 ===
write.table(exp, file="ExpressionTable_renamed.tsv", sep="\t", quote=FALSE, row.names=TRUE, col.names=NA)

edgeR

library(edgeR)
## Warning: package 'edgeR' was built under R version 4.4.2
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.4.2
library(dplyr)

counts <- read.table("ExpressionTable.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
cohort <- read.table("Cohort.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)

run_edgeR_comparison <- function(group_label, cohort, counts) {
  # サンプル選定
  samples <- cohort %>% filter(newcond3 %in% c(paste0(group_label, ".f"), paste0(group_label, ".h")))
  
  group <- factor(samples$newcond3, levels = c(paste0(group_label, ".f"), paste0(group_label, ".h")))
  colnames(counts) <- make.names(colnames(counts))  # Rの安全な名前に変換
  selected_counts <- counts[, make.names(samples$Cond)]

  # DGEListを作成
  dge <- DGEList(counts = selected_counts, group = group)
  keep <- filterByExpr(dge)
  dge <- dge[keep, , keep.lib.sizes = FALSE]
  dge <- calcNormFactors(dge)

  # モデル作成と検定
  design <- model.matrix(~group)
  dge <- estimateDisp(dge, design)
  fit <- glmQLFit(dge, design)
  qlf <- glmQLFTest(fit, coef = 2)

  # 結果出力
  result <- topTags(qlf, n = Inf)$table
  result$FDR <- p.adjust(result$PValue, method = "BH")
  result <- result[order(result$FDR), ]
  return(result)
}


# NO (Normal) 比較
res_NO <- run_edgeR_comparison("NO", cohort, counts)

# OB (Obese) 比較
res_OB <- run_edgeR_comparison("OB", cohort, counts)

# POB (Pre-Obese) 比較
res_POB <- run_edgeR_comparison("POB", cohort, counts)

# 保存
write.csv(res_NO, "edgeR_NO_h_vs_f.csv")
write.csv(res_OB, "edgeR_OB_h_vs_f.csv")
write.csv(res_POB, "edgeR_POB_h_vs_f.csv")

volcano plot

library(ggplot2)

plot_volcano <- function(res, title = "Volcano Plot", logFC_threshold = 1, FDR_threshold = 0.05) {
  res$gene <- rownames(res)
  res$Significant <- with(res, ifelse(FDR < FDR_threshold & abs(logFC) >= logFC_threshold,
                                      ifelse(logFC > 0, "Up", "Down"),
                                      "Not Significant"))
  
  ggplot(res, aes(x = logFC, y = -log10(FDR), color = Significant)) +
    geom_point(alpha = 0.7) +
    scale_color_manual(values = c("Up" = "red", "Down" = "blue", "Not Significant" = "grey")) +
    theme_minimal() +
    labs(title = title,
         x = "log2 Fold Change",
         y = "-log10 FDR") +
    geom_vline(xintercept = c(-logFC_threshold, logFC_threshold), linetype = "dashed", color = "black") +
    geom_hline(yintercept = -log10(FDR_threshold), linetype = "dashed", color = "black")
}

# NO群のvolcano plot
plot_volcano(res_NO, title = "NO.h vs NO.f")

# OB群
plot_volcano(res_OB, title = "OB.h vs OB.f")

# POB群
plot_volcano(res_POB, title = "POB.h vs POB.f")

edgeR using design matrix (better way than comparing h vs f for each)

library(edgeR)
library(dplyr)

# データ読み込み
counts <- read.table("ExpressionTable.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
cohort <- read.table("Cohort.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)

# 一致するサンプルのみ抽出
common_samples <- intersect(colnames(counts), cohort$Cond)
counts <- counts[, common_samples]
cohort <- cohort[cohort$Cond %in% common_samples, ]

# group(NO.f, NO.h, OB.f, OB.h, POB.f, POB.h など)を因子として定義
group <- factor(cohort$newcond3)

# デザインマトリクスの作成
design <- model.matrix(~ 0 + group)  # インターセプトなしで各群を列に
colnames(design) <- levels(group)   # 列名を分かりやすく

dge <- DGEList(counts = counts, group = group)

keep <- filterByExpr(dge, design)
dge <- dge[keep, , keep.lib.sizes = FALSE]
dge <- calcNormFactors(dge)

dge <- estimateDisp(dge, design)
fit <- glmQLFit(dge, design)

# 比較したいペア(h vs f)
contrast_NO  <- makeContrasts(NO.h - NO.f, levels = design)
contrast_OB  <- makeContrasts(OB.h - OB.f, levels = design)
contrast_POB <- makeContrasts(POB.h - POB.f, levels = design)


qlf_NO  <- glmQLFTest(fit, contrast = contrast_NO)
qlf_OB  <- glmQLFTest(fit, contrast = contrast_OB)
qlf_POB <- glmQLFTest(fit, contrast = contrast_POB)

# 結果取得
res_NO  <- topTags(qlf_NO, n = Inf)$table
res_OB  <- topTags(qlf_OB, n = Inf)$table
res_POB <- topTags(qlf_POB, n = Inf)$table

# 保存
write.csv(res_NO, "edgeR_NO_h_vs_f_2.csv")
write.csv(res_OB, "edgeR_OB_h_vs_f_2.csv")
write.csv(res_POB, "edgeR_POB_h_vs_f_2.csv")

# plot_volcano function that i defined earlier
plot_volcano(res_NO, title = "NO.h vs NO.f")

plot_volcano(res_OB, title = "OB.h vs OB.f")

plot_volcano(res_POB, title = "POB.h vs POB.f")

PCA against DEG only prep for PCA

# 正規化済みの発現データ
norm_expr <- read.table("ExpressionTable_norm_renamed.tsv", sep="\t", header=TRUE, row.names=1, check.names=FALSE)

# DEG結果(例: res_NO)
deg_res <- read.csv("edgeR_NO_h_vs_f.csv", row.names = 1)
deg_genes <- rownames(deg_res[deg_res$FDR < 0.05, ])
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_genes, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG_NO.tsv", sep="\t", quote=FALSE, col.names=NA)

deg_res <- read.csv("edgeR_OB_h_vs_f.csv", row.names = 1)
deg_genes <- rownames(deg_res[deg_res$FDR < 0.05, ])
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_genes, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG_OB.tsv", sep="\t", quote=FALSE, col.names=NA)

deg_res <- read.csv("edgeR_POB_h_vs_f.csv", row.names = 1)
deg_genes <- rownames(deg_res[deg_res$FDR < 0.05, ])
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_genes, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG_POB.tsv", sep="\t", quote=FALSE, col.names=NA)

# 全てのDEG
deg_NO  <- rownames(res_NO[res_NO$FDR < 0.05, ])
deg_OB  <- rownames(res_OB[res_OB$FDR < 0.05, ])
deg_POB <- rownames(res_POB[res_POB$FDR < 0.05, ])
# 全てのDEGを結合してユニークに(重複を除く)
deg_all <- unique(c(deg_NO, deg_OB, deg_POB))
deg_expr <- norm_expr[rownames(norm_expr) %in% deg_all, ]
write.table(deg_expr, "ExpressionTable_norm_renamed_DEG.tsv", sep="\t", quote=FALSE, col.names=NA)

PCA using the function i defined earlier NO

# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)

# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG_NO.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)

# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)

# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)

# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
                      ifelse(grepl("h", pca_df$Sample), "h", "other"))

# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
                       ifelse(pca_df$Type == "h", 0.8, 1.0))

# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
                       "OB.f" = "orange",    "OB.h" = "orange",
                       "POB.f" = "forestgreen", "POB.h" = "forestgreen")

# === f/hの重心を計算 ===
centroids <- pca_df %>%
  filter(Type %in% c("f", "h")) %>%
  group_by(Group, Type) %>%
  summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
  mutate(ID = paste(Group, Type, sep = "."),
         Alpha = ifelse(Type == "f", 0.3, 0.8))

# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
  select(Group, Type, PC1, PC2) %>%
  pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
  filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
  mutate(x_start = PC1_f, y_start = PC2_f,
         x_end = PC1_h, y_end = PC2_h)

# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_point(aes(color = Group, alpha = Alpha), size = 3) +
  scale_color_manual(values = color_map) +
  scale_alpha_identity() +
  # 四角い重心ポイント(f: 透明度低く, h: 高く)
  geom_point(data = centroids,
             aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
             shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
  scale_fill_manual(values = centroid_fill_map) +
  # 矢印追加
  geom_segment(data = arrow_data,
               aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
               arrow = arrow(length = unit(0.25, "cm")),
               color = "black", size = 1) +
  # 軸・タイトルなど
  theme_minimal(base_size = 14) +
  labs(title = "PCA with Group Centroids and Arrows",
       x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
       y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
  theme(legend.position = "right")

# === 描画 ===
print(p)

OB

# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)

# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG_OB.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)

# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)

# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)

# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
                      ifelse(grepl("h", pca_df$Sample), "h", "other"))

# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
                       ifelse(pca_df$Type == "h", 0.8, 1.0))

# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
                       "OB.f" = "orange",    "OB.h" = "orange",
                       "POB.f" = "forestgreen", "POB.h" = "forestgreen")

# === f/hの重心を計算 ===
centroids <- pca_df %>%
  filter(Type %in% c("f", "h")) %>%
  group_by(Group, Type) %>%
  summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
  mutate(ID = paste(Group, Type, sep = "."),
         Alpha = ifelse(Type == "f", 0.3, 0.8))

# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
  select(Group, Type, PC1, PC2) %>%
  pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
  filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
  mutate(x_start = PC1_f, y_start = PC2_f,
         x_end = PC1_h, y_end = PC2_h)

# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_point(aes(color = Group, alpha = Alpha), size = 3) +
  scale_color_manual(values = color_map) +
  scale_alpha_identity() +
  # 四角い重心ポイント(f: 透明度低く, h: 高く)
  geom_point(data = centroids,
             aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
             shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
  scale_fill_manual(values = centroid_fill_map) +
  # 矢印追加
  geom_segment(data = arrow_data,
               aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
               arrow = arrow(length = unit(0.25, "cm")),
               color = "black", size = 1) +
  # 軸・タイトルなど
  theme_minimal(base_size = 14) +
  labs(title = "PCA with Group Centroids and Arrows",
       x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
       y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
  theme(legend.position = "right")

# === 描画 ===
print(p)

POB

# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)

# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG_POB.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)

# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)

# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)

# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
                      ifelse(grepl("h", pca_df$Sample), "h", "other"))

# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
                       ifelse(pca_df$Type == "h", 0.8, 1.0))

# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
                       "OB.f" = "orange",    "OB.h" = "orange",
                       "POB.f" = "forestgreen", "POB.h" = "forestgreen")

# === f/hの重心を計算 ===
centroids <- pca_df %>%
  filter(Type %in% c("f", "h")) %>%
  group_by(Group, Type) %>%
  summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
  mutate(ID = paste(Group, Type, sep = "."),
         Alpha = ifelse(Type == "f", 0.3, 0.8))

# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
  select(Group, Type, PC1, PC2) %>%
  pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
  filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
  mutate(x_start = PC1_f, y_start = PC2_f,
         x_end = PC1_h, y_end = PC2_h)

# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_point(aes(color = Group, alpha = Alpha), size = 3) +
  scale_color_manual(values = color_map) +
  scale_alpha_identity() +
  # 四角い重心ポイント(f: 透明度低く, h: 高く)
  geom_point(data = centroids,
             aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
             shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
  scale_fill_manual(values = centroid_fill_map) +
  # 矢印追加
  geom_segment(data = arrow_data,
               aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
               arrow = arrow(length = unit(0.25, "cm")),
               color = "black", size = 1) +
  # 軸・タイトルなど
  theme_minimal(base_size = 14) +
  labs(title = "PCA with Group Centroids and Arrows",
       x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
       y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
  theme(legend.position = "right")

# === 描画 ===
print(p)

PCA using DEGs in at least one comparison

# === ライブラリ読み込み ===
library(ggplot2)
library(dplyr)
library(tidyr)

# === データ読み込み ===
exp <- read.table("ExpressionTable_norm_renamed_DEG.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)

# === PCA準備 ===
exp_t <- t(exp)
pca <- prcomp(exp_t, scale. = TRUE)

# === PCA結果データフレーム ===
pca_df <- as.data.frame(pca$x)
pca_df$Sample <- rownames(pca_df)

# === グループとタイプ(f/h)を抽出 ===
pca_df$Group <- sub("^(NO|OB|POB).*", "\\1", pca_df$Sample)
pca_df$Type <- ifelse(grepl("f", pca_df$Sample), "f",
                      ifelse(grepl("h", pca_df$Sample), "h", "other"))

# === 透明度の設定 ===
pca_df$Alpha <- ifelse(pca_df$Type == "f", 0.3,
                       ifelse(pca_df$Type == "h", 0.8, 1.0))

# === 色マップ設定 ===
color_map <- c("NO" = "dodgerblue", "OB" = "orange", "POB" = "forestgreen")
centroid_fill_map <- c("NO.f" = "dodgerblue", "NO.h" = "dodgerblue",
                       "OB.f" = "orange",    "OB.h" = "orange",
                       "POB.f" = "forestgreen", "POB.h" = "forestgreen")

# === f/hの重心を計算 ===
centroids <- pca_df %>%
  filter(Type %in% c("f", "h")) %>%
  group_by(Group, Type) %>%
  summarise(PC1 = mean(PC1), PC2 = mean(PC2), .groups = "drop") %>%
  mutate(ID = paste(Group, Type, sep = "."),
         Alpha = ifelse(Type == "f", 0.3, 0.8))

# === 矢印用データ:f と h の重心を結ぶペア ===
arrow_data <- centroids %>%
  select(Group, Type, PC1, PC2) %>%
  pivot_wider(names_from = Type, values_from = c(PC1, PC2)) %>%
  filter(!is.na(PC1_f) & !is.na(PC1_h)) %>%
  mutate(x_start = PC1_f, y_start = PC2_f,
         x_end = PC1_h, y_end = PC2_h)

# === プロット ===
p <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_point(aes(color = Group, alpha = Alpha), size = 3) +
  scale_color_manual(values = color_map) +
  scale_alpha_identity() +
  # 四角い重心ポイント(f: 透明度低く, h: 高く)
  geom_point(data = centroids,
             aes(x = PC1, y = PC2, fill = ID, alpha = Alpha),
             shape = 22, size = 5, color = "black", stroke = 1.2, show.legend = FALSE) +
  scale_fill_manual(values = centroid_fill_map) +
  # 矢印追加
  geom_segment(data = arrow_data,
               aes(x = x_start, y = y_start, xend = x_end, yend = y_end),
               arrow = arrow(length = unit(0.25, "cm")),
               color = "black", size = 1) +
  # 軸・タイトルなど
  theme_minimal(base_size = 14) +
  labs(title = "PCA with Group Centroids and Arrows",
       x = paste0("PC1 (", round(summary(pca)$importance[2,1]*100, 1), "%)"),
       y = paste0("PC2 (", round(summary(pca)$importance[2,2]*100, 1), "%)")) +
  theme(legend.position = "right")

# === 描画 ===
print(p)

PCA for Denmark or KI (check for batch effect)

# 発現データの読み込み
expr <- read.table("ExpressionTable_norm.tsv", sep = "\t", header = TRUE, row.names = 1, check.names = FALSE)

# 臨床情報の読み込み
clinical <- read.table("clinical_data.txt", sep = "\t", header = TRUE, stringsAsFactors = FALSE)

# 元の列名を取得
original_colnames <- colnames(expr)

# ベース名(末尾のf0, h0, f2, h2などを除く)を抽出
# 正規表現で末尾のf0, h0, f2, h2 を除去
base_names <- gsub("f[0-9]$|h[0-9]$", "", original_colnames)

# 新しい列名を生成
new_colnames <- mapply(function(orig, base) {
  where <- clinical$Where[clinical$ID == base]
  if (length(where) == 1) {
    paste0(orig, "_", where)
  } else {
    orig  # 対応が見つからない場合はそのまま
  }
}, orig = original_colnames, base = base_names)

# 列名を置き換え
colnames(expr) <- new_colnames

# 保存
write.table(expr, "ExpressionTable_norm_place.tsv", sep = "\t", quote = FALSE, col.names = NA)

-> There are so many samples missing in the clinical_data.txt, so i gave up.

Merge to genes at last.(didn’t need to do this)

exp_norm <- read.table("ExpressionTable_norm.tsv", sep="\t", header=TRUE, stringsAsFactors=FALSE) # don't use row.names=1 because i will use the first column to merge.
annot <- read.table("Annotation.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE)
exp_norm_ids <- exp_norm[[1]]
exp_norm$TC <- exp_norm_ids
merged <- merge(annot[, c("TC", "gene")], exp_norm, by = "TC", all.y = TRUE)
head(merged)
##   TC gene NG1f0      NG1h0    NG2f0     NG2h0     NG3f0    NG3h0     NG4f0
## 1  0 <NA>     0  0.4879297 1.656625  1.212721 2.2969698 2.889605 2.3143152
## 2  0 <NA>     0  0.1626432 2.761041  2.071732 3.5498624 2.763970 1.6615596
## 3  0 <NA>     0  3.9034374 0.788869  1.313781 0.0000000 1.130715 0.0000000
## 4  0 <NA>     0 18.8666142 5.088205 11.520850 0.6264463 6.281750 1.7802425
## 5  0 <NA>     0  0.1626432 2.051059  1.111661 0.2088154 1.633255 0.2967071
## 6  0 <NA>     0  0.3252865 1.735512  1.010601 1.4617081 0.502540 1.8395839
##      NG4h0    NK13f0     NK13h0   NK14f0     NK14h0    NK15f0    NK15h0
## 1 2.905230 0.0000000  0.0000000 0.000000  0.5438818 0.5753598 1.1811344
## 2 1.885851 0.0000000  0.0000000 0.000000  0.1255112 0.7671463 0.0000000
## 3 1.070348 0.0000000  5.2961169 0.000000  3.5143129 0.1917866 0.8436674
## 4 7.492435 0.5775234 22.2802157 0.000000 18.1154464 0.7671463 4.3870706
## 5 2.038758 0.0000000  0.7304989 0.000000  0.2928594 0.3835732 0.5062004
## 6 1.325193 0.0000000  0.5478742 2.630139  0.5020447 0.0000000 0.6749339
##      NK22f0     NK22h0     NK25f0    NK25h0    NK26f0    NK26h0    NK27f0
## 1 0.0000000  0.0000000 0.57082415 1.1655768 0.6198429 0.4331197 0.0000000
## 2 0.0000000  0.2215411 0.22832966 0.3885256 0.6198429 0.6496796 0.3559544
## 3 0.0000000  1.9938695 0.51374174 1.8131194 0.3099215 1.2993591 0.0000000
## 4 1.0079500 27.4710911 0.45665932 7.3172321 0.3099215 5.1974365 1.4238174
## 5 0.0000000  0.0000000 0.05708242 0.5180341 0.1549607 0.6496796 0.0000000
## 6 0.3359833  0.6646232 0.45665932 2.5901706 0.3099215 0.6496796 0.7119087
##       NK27h0    NK29f0     NK29h0     NK2f0     NK2h0    NK35f0    NK35h0
## 1  0.2602594 3.7597823  1.6744068 0.0000000 0.6678806 1.3151710 1.2272255
## 2  0.0000000 2.9635931  1.2558051 0.0000000 0.4452537 0.6575855 0.7012717
## 3  3.3833717 1.4154475  2.8604450 1.1108651 2.2262686 0.0000000 0.1753179
## 4 31.7516418 9.2004084 12.6975852 0.0000000 8.4598206 0.6575855 3.6816766
## 5  0.0000000 2.3443348  0.7674365 0.0000000 0.4452537 0.9863783 0.0000000
## 6  0.0000000 0.7961892  0.3488348 0.3702884 1.7810149 0.4931891 0.8765897
##      NK37f0    NK37h0    NK38f0    NK38h0    NK39f0    NK39h0   NK3f0     NK3h0
## 1 1.4552990 0.0000000 0.4563273 0.9691018 0.2184543 0.5143809 4.14776  3.643239
## 2 0.7276495 0.0000000 0.6084364 0.6460679 0.1092271 0.3429206 4.14776 18.216197
## 3 0.0000000 0.0000000 0.1521091 0.6460679 0.7645900 1.7146031 0.00000  0.000000
## 4 2.5467733 2.8631913 0.3042182 5.1685430 0.1092271 6.8012588 4.14776 18.216197
## 5 0.3638248 0.0000000 0.3042182 0.3230339 0.0000000 0.2857672 0.00000  3.643239
## 6 0.7276495 0.5368484 0.3042182 0.9691018 0.4369085 2.2861374 0.00000  3.643239
##      NK40f0    NK40h0    NK44f0     NK44h0     NK7f0     NK7h0     NK8f0
## 1 5.7418925 5.5697247 0.0000000  0.0000000 0.0000000  0.000000 0.7505006
## 2 1.7443724 3.1124932 0.0000000  0.0000000 0.0000000  0.000000 0.5003337
## 3 1.3082793 1.1467080 0.0000000  5.2481620 0.0000000  3.235113 0.0000000
## 4 2.8346052 5.8973555 0.9107489 27.8152584 0.7128785 23.858956 0.5003337
## 5 0.2907287 0.8190772 0.0000000  0.0000000 0.0000000  0.000000 0.0000000
## 6 1.5990080 1.1467080 1.3661233  0.5248162 0.3564393  0.000000 0.5003337
##       NK8h0      NK9f0     NK9h0  NO101f0   NO101f2   NO101h0   NO101h2
## 1 0.0000000 0.79958731 1.2110521 1.623007 0.9223195  2.207386  0.000000
## 2 1.4344870 0.00000000 0.3229472 2.713880 0.9223195  2.881219  0.000000
## 3 1.1954058 0.43054702 1.9376834 1.223907 0.0000000  2.672099  5.428387
## 4 8.3678407 0.49205373 7.0644706 8.673774 1.5371991 18.007621 13.570967
## 5 0.2390812 0.06150672 0.5651577 1.223907 0.0000000  1.394138  0.000000
## 6 1.1954058 0.43054702 2.3817358 1.037660 1.5371991  1.022368  0.000000
##     NO108f0   NO108f2   NO108h0   NO108h2   NO117f0  NO117f2  NO117h0
## 1 3.8699337 0.3124314 4.3292525 0.4802384 4.1866810 0.460191 3.203447
## 2 4.9583526 0.9372941 2.3564286 0.9604769 4.1866810 0.000000 3.203447
## 3 0.1209354 0.0000000 1.2604153 0.1600795 0.0000000 0.000000 0.000000
## 4 1.3907574 0.9372941 9.3709136 1.4407153 0.7850027 0.000000 2.038557
## 5 1.2698220 0.0000000 1.9180232 0.0000000 2.0933405 0.000000 1.456112
## 6 0.7860803 0.0000000 0.3836046 0.9604769 1.0466702 0.460191 1.747335
##      NO117h2    NO121f0     NO121f2    NO121h0   NO121h2   NO125f0   NO125f2
## 1  0.0000000  1.5021482  0.59900377  2.3560599  0.000000 0.6270394 0.0000000
## 2  0.0000000  4.2413595  0.34228787  4.1156490  0.000000 0.8151513 0.0000000
## 3  2.0985182  1.9881373  3.16616279  2.8928837  5.004294 0.0000000 0.0000000
## 4 12.9726578 17.0096189 16.00195789 16.5818903 30.025764 1.2540789 1.9278469
## 5  0.0000000  2.4299456  0.08557197  2.5051777  1.154837 0.1254079 0.3855694
## 6  0.3815488  0.7952549  0.34228787  0.8052357  1.539783 2.3200459 0.3855694
##      NO125h0   NO125h2    NO131f0   NO131f2   NO131h0   NO131h2   NO135f0
## 1  1.3769786 0.0000000 3.87836375 0.0000000 4.9061389 1.0583363  1.713434
## 2  0.1967112 0.0000000 5.50477436 0.0000000 2.9953269 0.7937522  3.426867
## 3  1.8687566 1.4180002 0.06255425 0.0000000 0.7230099 1.0583363  1.761029
## 4 14.1632082 3.5450006 1.56385635 0.3529386 9.1409113 8.2021062 12.612775
## 5  0.7868449 0.0000000 1.43874784 0.0000000 2.2723169 0.2645841  1.856220
## 6  0.8852005 0.3545001 0.81320530 1.7646930 0.4131485 0.0000000  1.237480
##     NO135f2    NO135h0    NO135h2   NO146f0   NO146f2   NO146h0   NO146h2
## 1 0.0000000  1.9372761  0.2472569 3.0444569 0.9079499 1.6952729 0.4965870
## 2 0.0000000  3.4305931  0.7417708 0.3805571 0.7263599 0.3082314 0.7945392
## 3 0.0000000  2.4215951  3.7088540 0.0000000 0.5447699 0.4623471 0.0000000
## 4 1.2182694 15.4174888 18.7915268 0.5708357 1.8158997 3.6987771 2.4829350
## 5 0.9137021  1.8161963  0.2472569 0.3805571 0.4539749 0.3082314 0.0993174
## 6 0.0000000  0.8071984  0.4945139 0.3805571 0.5447699 0.3082314 0.5959044
##    NO150f0   NO150f2   NO150h0   NO150h2    NO22f0    NO22f2    NO22h0
## 1 4.848906 0.0000000 4.3087060 0.0000000 4.5709747 0.0000000 2.8216199
## 2 4.434975 1.4782425 3.8577949 0.5019471 3.2911018 0.0000000 2.2925662
## 3 0.177399 0.0000000 1.3527333 0.0000000 0.0000000 0.0000000 0.0000000
## 4 1.892256 1.4782425 6.3127552 2.0077882 1.2798729 1.2984748 1.5871612
## 5 1.241793 0.7391212 1.9539480 0.5019471 0.6094633 0.0000000 0.8817562
## 6 1.005261 0.7391212 0.3507086 0.5019471 0.7923023 0.4328249 2.5570931
##      NO22h2    NO27f0    NO27f2    NO27h0    NO27h2   NO32f0    NO32f2
## 1 0.0000000 3.6828320 0.7437243 2.9040732 0.2716667 3.552192 0.0000000
## 2 0.3080479 3.6828320 0.0000000 3.2670823 0.4075000 3.650864 0.4949708
## 3 0.6160959 0.1753730 0.3718622 0.0000000 0.4075000 0.000000 0.0000000
## 4 4.9287670 1.0522377 2.2311729 2.1780549 2.8525001 1.776096 1.7323980
## 5 0.3080479 1.0522377 0.3718622 1.0890274 0.4075000 2.269456 0.0000000
## 6 0.0000000 0.3507459 1.3015176 0.1815046 0.8150000 0.888048 0.7424563
##      NO32h0    NO32h2    NO37f0     NO37f2     NO37h0     NO37h2    NO40f0
## 1  3.445509 0.7533388 1.3913043 0.70336486  0.8315976  0.6248568 0.4319113
## 2  2.819053 0.2511129 1.5652174 0.51987838  0.2771992  0.1041428 1.5116897
## 3  2.270904 2.2600164 0.0000000 0.06116216  2.7719920  4.1309976 0.0000000
## 4 11.354519 5.6500410 0.8695652 1.52905405 12.4739641 18.9887033 0.2159557
## 5  1.487834 0.5022259 0.1739130 0.88685135  0.0000000  0.4165712 0.2159557
## 6  1.017991 0.6277823 0.6956522 0.64220270  0.5543984  0.9719994 4.1031577
##      NO40f2     NO40h0     NO40h2     NO47f0    NO47f2    NO47h0     NO47h2
## 1 0.2227340  1.5172918  0.6420715 3.87590296 0.6037583 2.6342518  0.8107016
## 2 0.8909359  1.2414206  1.0701191 5.04768758 0.0000000 4.2148030  1.0133770
## 3 0.0000000  2.4828412  3.8524289 0.09013728 0.0000000 0.3292815  1.8240786
## 4 0.2227340 19.7247936 26.3249305 0.99151006 0.0000000 2.1732578 13.1739008
## 5 0.4454679  0.8276137  0.8560953 1.26192189 0.3018791 0.7902756  1.6214032
## 6 0.2227340  1.5172918  0.2140238 1.08164734 0.0000000 0.3951378  0.4053508
##      NO48f0   NO48f2   NO48h0    NO48h2    NO50f0    NO50f2    NO50h0
## 1 5.3129401 0.000000 4.318752 0.1964405 8.3852185 0.0000000  2.259935
## 2 3.3809619 0.678554 2.670807 1.3750835 4.3922573 0.0000000  3.884264
## 3 0.0000000 0.000000 2.329853 1.9644050 0.0000000 0.0000000  1.906820
## 4 0.4829946 0.339277 9.148934 9.0362629 0.9316909 0.3839242 18.715090
## 5 1.2419860 0.000000 1.193339 0.0000000 1.0647897 0.3839242  1.977443
## 6 1.5179829 2.374939 1.420642 1.5715240 0.9316909 1.1517725  1.906820
##       NO50h2    NO51f0    NO51f2    NO51h0    NO51h2    NO56f0    NO56f2
## 1  0.0000000 0.7592828 0.6775729 2.0290429 0.4671365 1.7579734 0.8544474
## 2  0.2773026 2.3727587 0.1693932 1.8261386 0.3114243 3.5159468 0.2136119
## 3  1.6638155 0.0000000 0.0000000 0.4058086 1.0899851 0.0000000 0.0000000
## 4 15.2516424 0.8541931 0.3387864 0.9130693 3.4256675 0.6392631 0.2136119
## 5  0.0000000 1.1389242 0.3387864 1.2174257 0.0000000 0.4794473 0.0000000
## 6  2.2184207 1.6134759 0.5081797 1.7246865 0.3114243 0.6392631 0.8544474
##      NO56h0    NO56h2    NO68f0    NO68f2    NO68h0     NO68h2    NO73f0
## 1 2.8046540 0.7082324 1.5635631 0.0000000 0.6244423  0.1909014 0.3046655
## 2 2.8804555 0.7082324 0.6700985 0.8940396 1.4570320  0.5727042 0.6093311
## 3 0.4548088 0.7082324 0.1116831 0.0000000 0.2081474  1.5272113 0.0000000
## 4 1.1370219 3.7182198 1.7869292 0.6705297 8.6381184 11.4540846 0.9139966
## 5 0.9854190 0.1770581 2.4570277 0.2235099 2.9140640  0.3818028 0.3046655
## 6 2.2740438 0.3541162 1.0051477 1.3410593 0.6244423  0.3818028 0.9139966
##      NO73f2     NO73h0    NO73h2    NO77f0    NO77f2    NO77h0    NO77h2
## 1 0.3800955  0.5525031 0.2281805 1.6747464 0.1858686  2.008137 0.2179572
## 2 0.0000000  0.0000000 0.2281805 2.2708426 0.1858686  2.920926 0.4359145
## 3 0.0000000  1.9337610 0.0000000 1.0786502 0.0000000  2.669909 1.7436579
## 4 1.5203819 14.9175845 1.3690828 9.4239968 0.1858686 16.909424 3.4873158
## 5 0.0000000  0.8287547 0.0000000 1.3625056 0.0000000  1.665841 0.2179572
## 6 0.3800955  0.0000000 0.6845414 0.6812528 0.9293432  1.346364 0.0000000
##      NO89f0    NO89f2    NO89h0    NO89h2
## 1 4.5265527 0.3802715 3.8222503 0.0000000
## 2 4.8985981 0.1901358 3.3635802 0.0000000
## 3 0.0000000 0.1901358 1.8346801 1.6303852
## 4 1.7362120 0.3168929 9.4282173 7.0650024
## 5 2.1702650 0.3802715 1.8856435 0.2717309
## 6 0.9301136 0.4436501 0.3567434 1.3586543
merged_out <- merged[, c("gene", colnames(merged)[!(colnames(merged) %in% c("TC", "gene"))])] # gene column to the first column and remove TC column
str(merged_out)
## 'data.frame':    29189 obs. of  139 variables:
##  $ gene   : chr  NA NA NA NA ...
##  $ NG1f0  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ NG1h0  : num  0.488 0.163 3.903 18.867 0.163 ...
##  $ NG2f0  : num  1.657 2.761 0.789 5.088 2.051 ...
##  $ NG2h0  : num  1.21 2.07 1.31 11.52 1.11 ...
##  $ NG3f0  : num  2.297 3.55 0 0.626 0.209 ...
##  $ NG3h0  : num  2.89 2.76 1.13 6.28 1.63 ...
##  $ NG4f0  : num  2.314 1.662 0 1.78 0.297 ...
##  $ NG4h0  : num  2.91 1.89 1.07 7.49 2.04 ...
##  $ NK13f0 : num  0 0 0 0.578 0 ...
##  $ NK13h0 : num  0 0 5.3 22.28 0.73 ...
##  $ NK14f0 : num  0 0 0 0 0 ...
##  $ NK14h0 : num  0.544 0.126 3.514 18.115 0.293 ...
##  $ NK15f0 : num  0.575 0.767 0.192 0.767 0.384 ...
##  $ NK15h0 : num  1.181 0 0.844 4.387 0.506 ...
##  $ NK22f0 : num  0 0 0 1.01 0 ...
##  $ NK22h0 : num  0 0.222 1.994 27.471 0 ...
##  $ NK25f0 : num  0.5708 0.2283 0.5137 0.4567 0.0571 ...
##  $ NK25h0 : num  1.166 0.389 1.813 7.317 0.518 ...
##  $ NK26f0 : num  0.62 0.62 0.31 0.31 0.155 ...
##  $ NK26h0 : num  0.433 0.65 1.299 5.197 0.65 ...
##  $ NK27f0 : num  0 0.356 0 1.424 0 ...
##  $ NK27h0 : num  0.26 0 3.38 31.75 0 ...
##  $ NK29f0 : num  3.76 2.96 1.42 9.2 2.34 ...
##  $ NK29h0 : num  1.674 1.256 2.86 12.698 0.767 ...
##  $ NK2f0  : num  0 0 1.11 0 0 ...
##  $ NK2h0  : num  0.668 0.445 2.226 8.46 0.445 ...
##  $ NK35f0 : num  1.315 0.658 0 0.658 0.986 ...
##  $ NK35h0 : num  1.227 0.701 0.175 3.682 0 ...
##  $ NK37f0 : num  1.455 0.728 0 2.547 0.364 ...
##  $ NK37h0 : num  0 0 0 2.86 0 ...
##  $ NK38f0 : num  0.456 0.608 0.152 0.304 0.304 ...
##  $ NK38h0 : num  0.969 0.646 0.646 5.169 0.323 ...
##  $ NK39f0 : num  0.218 0.109 0.765 0.109 0 ...
##  $ NK39h0 : num  0.514 0.343 1.715 6.801 0.286 ...
##  $ NK3f0  : num  4.15 4.15 0 4.15 0 ...
##  $ NK3h0  : num  3.64 18.22 0 18.22 3.64 ...
##  $ NK40f0 : num  5.742 1.744 1.308 2.835 0.291 ...
##  $ NK40h0 : num  5.57 3.112 1.147 5.897 0.819 ...
##  $ NK44f0 : num  0 0 0 0.911 0 ...
##  $ NK44h0 : num  0 0 5.25 27.82 0 ...
##  $ NK7f0  : num  0 0 0 0.713 0 ...
##  $ NK7h0  : num  0 0 3.24 23.86 0 ...
##  $ NK8f0  : num  0.751 0.5 0 0.5 0 ...
##  $ NK8h0  : num  0 1.434 1.195 8.368 0.239 ...
##  $ NK9f0  : num  0.7996 0 0.4305 0.4921 0.0615 ...
##  $ NK9h0  : num  1.211 0.323 1.938 7.064 0.565 ...
##  $ NO101f0: num  1.62 2.71 1.22 8.67 1.22 ...
##  $ NO101f2: num  0.922 0.922 0 1.537 0 ...
##  $ NO101h0: num  2.21 2.88 2.67 18.01 1.39 ...
##  $ NO101h2: num  0 0 5.43 13.57 0 ...
##  $ NO108f0: num  3.87 4.958 0.121 1.391 1.27 ...
##  $ NO108f2: num  0.312 0.937 0 0.937 0 ...
##  $ NO108h0: num  4.33 2.36 1.26 9.37 1.92 ...
##  $ NO108h2: num  0.48 0.96 0.16 1.44 0 ...
##  $ NO117f0: num  4.187 4.187 0 0.785 2.093 ...
##  $ NO117f2: num  0.46 0 0 0 0 ...
##  $ NO117h0: num  3.2 3.2 0 2.04 1.46 ...
##  $ NO117h2: num  0 0 2.1 13 0 ...
##  $ NO121f0: num  1.5 4.24 1.99 17.01 2.43 ...
##  $ NO121f2: num  0.599 0.3423 3.1662 16.002 0.0856 ...
##  $ NO121h0: num  2.36 4.12 2.89 16.58 2.51 ...
##  $ NO121h2: num  0 0 5 30.03 1.15 ...
##  $ NO125f0: num  0.627 0.815 0 1.254 0.125 ...
##  $ NO125f2: num  0 0 0 1.928 0.386 ...
##  $ NO125h0: num  1.377 0.197 1.869 14.163 0.787 ...
##  $ NO125h2: num  0 0 1.42 3.55 0 ...
##  $ NO131f0: num  3.8784 5.5048 0.0626 1.5639 1.4387 ...
##  $ NO131f2: num  0 0 0 0.353 0 ...
##  $ NO131h0: num  4.906 2.995 0.723 9.141 2.272 ...
##  $ NO131h2: num  1.058 0.794 1.058 8.202 0.265 ...
##  $ NO135f0: num  1.71 3.43 1.76 12.61 1.86 ...
##  $ NO135f2: num  0 0 0 1.218 0.914 ...
##  $ NO135h0: num  1.94 3.43 2.42 15.42 1.82 ...
##  $ NO135h2: num  0.247 0.742 3.709 18.792 0.247 ...
##  $ NO146f0: num  3.044 0.381 0 0.571 0.381 ...
##  $ NO146f2: num  0.908 0.726 0.545 1.816 0.454 ...
##  $ NO146h0: num  1.695 0.308 0.462 3.699 0.308 ...
##  $ NO146h2: num  0.4966 0.7945 0 2.4829 0.0993 ...
##  $ NO150f0: num  4.849 4.435 0.177 1.892 1.242 ...
##  $ NO150f2: num  0 1.478 0 1.478 0.739 ...
##  $ NO150h0: num  4.31 3.86 1.35 6.31 1.95 ...
##  $ NO150h2: num  0 0.502 0 2.008 0.502 ...
##  $ NO22f0 : num  4.571 3.291 0 1.28 0.609 ...
##  $ NO22f2 : num  0 0 0 1.3 0 ...
##  $ NO22h0 : num  2.822 2.293 0 1.587 0.882 ...
##  $ NO22h2 : num  0 0.308 0.616 4.929 0.308 ...
##  $ NO27f0 : num  3.683 3.683 0.175 1.052 1.052 ...
##  $ NO27f2 : num  0.744 0 0.372 2.231 0.372 ...
##  $ NO27h0 : num  2.9 3.27 0 2.18 1.09 ...
##  $ NO27h2 : num  0.272 0.408 0.408 2.853 0.408 ...
##  $ NO32f0 : num  3.55 3.65 0 1.78 2.27 ...
##  $ NO32f2 : num  0 0.495 0 1.732 0 ...
##  $ NO32h0 : num  3.45 2.82 2.27 11.35 1.49 ...
##  $ NO32h2 : num  0.753 0.251 2.26 5.65 0.502 ...
##  $ NO37f0 : num  1.391 1.565 0 0.87 0.174 ...
##  $ NO37f2 : num  0.7034 0.5199 0.0612 1.5291 0.8869 ...
##  $ NO37h0 : num  0.832 0.277 2.772 12.474 0 ...
##  $ NO37h2 : num  0.625 0.104 4.131 18.989 0.417 ...
##   [list output truncated]
head(merged_out)
##   gene NG1f0      NG1h0    NG2f0     NG2h0     NG3f0    NG3h0     NG4f0
## 1 <NA>     0  0.4879297 1.656625  1.212721 2.2969698 2.889605 2.3143152
## 2 <NA>     0  0.1626432 2.761041  2.071732 3.5498624 2.763970 1.6615596
## 3 <NA>     0  3.9034374 0.788869  1.313781 0.0000000 1.130715 0.0000000
## 4 <NA>     0 18.8666142 5.088205 11.520850 0.6264463 6.281750 1.7802425
## 5 <NA>     0  0.1626432 2.051059  1.111661 0.2088154 1.633255 0.2967071
## 6 <NA>     0  0.3252865 1.735512  1.010601 1.4617081 0.502540 1.8395839
##      NG4h0    NK13f0     NK13h0   NK14f0     NK14h0    NK15f0    NK15h0
## 1 2.905230 0.0000000  0.0000000 0.000000  0.5438818 0.5753598 1.1811344
## 2 1.885851 0.0000000  0.0000000 0.000000  0.1255112 0.7671463 0.0000000
## 3 1.070348 0.0000000  5.2961169 0.000000  3.5143129 0.1917866 0.8436674
## 4 7.492435 0.5775234 22.2802157 0.000000 18.1154464 0.7671463 4.3870706
## 5 2.038758 0.0000000  0.7304989 0.000000  0.2928594 0.3835732 0.5062004
## 6 1.325193 0.0000000  0.5478742 2.630139  0.5020447 0.0000000 0.6749339
##      NK22f0     NK22h0     NK25f0    NK25h0    NK26f0    NK26h0    NK27f0
## 1 0.0000000  0.0000000 0.57082415 1.1655768 0.6198429 0.4331197 0.0000000
## 2 0.0000000  0.2215411 0.22832966 0.3885256 0.6198429 0.6496796 0.3559544
## 3 0.0000000  1.9938695 0.51374174 1.8131194 0.3099215 1.2993591 0.0000000
## 4 1.0079500 27.4710911 0.45665932 7.3172321 0.3099215 5.1974365 1.4238174
## 5 0.0000000  0.0000000 0.05708242 0.5180341 0.1549607 0.6496796 0.0000000
## 6 0.3359833  0.6646232 0.45665932 2.5901706 0.3099215 0.6496796 0.7119087
##       NK27h0    NK29f0     NK29h0     NK2f0     NK2h0    NK35f0    NK35h0
## 1  0.2602594 3.7597823  1.6744068 0.0000000 0.6678806 1.3151710 1.2272255
## 2  0.0000000 2.9635931  1.2558051 0.0000000 0.4452537 0.6575855 0.7012717
## 3  3.3833717 1.4154475  2.8604450 1.1108651 2.2262686 0.0000000 0.1753179
## 4 31.7516418 9.2004084 12.6975852 0.0000000 8.4598206 0.6575855 3.6816766
## 5  0.0000000 2.3443348  0.7674365 0.0000000 0.4452537 0.9863783 0.0000000
## 6  0.0000000 0.7961892  0.3488348 0.3702884 1.7810149 0.4931891 0.8765897
##      NK37f0    NK37h0    NK38f0    NK38h0    NK39f0    NK39h0   NK3f0     NK3h0
## 1 1.4552990 0.0000000 0.4563273 0.9691018 0.2184543 0.5143809 4.14776  3.643239
## 2 0.7276495 0.0000000 0.6084364 0.6460679 0.1092271 0.3429206 4.14776 18.216197
## 3 0.0000000 0.0000000 0.1521091 0.6460679 0.7645900 1.7146031 0.00000  0.000000
## 4 2.5467733 2.8631913 0.3042182 5.1685430 0.1092271 6.8012588 4.14776 18.216197
## 5 0.3638248 0.0000000 0.3042182 0.3230339 0.0000000 0.2857672 0.00000  3.643239
## 6 0.7276495 0.5368484 0.3042182 0.9691018 0.4369085 2.2861374 0.00000  3.643239
##      NK40f0    NK40h0    NK44f0     NK44h0     NK7f0     NK7h0     NK8f0
## 1 5.7418925 5.5697247 0.0000000  0.0000000 0.0000000  0.000000 0.7505006
## 2 1.7443724 3.1124932 0.0000000  0.0000000 0.0000000  0.000000 0.5003337
## 3 1.3082793 1.1467080 0.0000000  5.2481620 0.0000000  3.235113 0.0000000
## 4 2.8346052 5.8973555 0.9107489 27.8152584 0.7128785 23.858956 0.5003337
## 5 0.2907287 0.8190772 0.0000000  0.0000000 0.0000000  0.000000 0.0000000
## 6 1.5990080 1.1467080 1.3661233  0.5248162 0.3564393  0.000000 0.5003337
##       NK8h0      NK9f0     NK9h0  NO101f0   NO101f2   NO101h0   NO101h2
## 1 0.0000000 0.79958731 1.2110521 1.623007 0.9223195  2.207386  0.000000
## 2 1.4344870 0.00000000 0.3229472 2.713880 0.9223195  2.881219  0.000000
## 3 1.1954058 0.43054702 1.9376834 1.223907 0.0000000  2.672099  5.428387
## 4 8.3678407 0.49205373 7.0644706 8.673774 1.5371991 18.007621 13.570967
## 5 0.2390812 0.06150672 0.5651577 1.223907 0.0000000  1.394138  0.000000
## 6 1.1954058 0.43054702 2.3817358 1.037660 1.5371991  1.022368  0.000000
##     NO108f0   NO108f2   NO108h0   NO108h2   NO117f0  NO117f2  NO117h0
## 1 3.8699337 0.3124314 4.3292525 0.4802384 4.1866810 0.460191 3.203447
## 2 4.9583526 0.9372941 2.3564286 0.9604769 4.1866810 0.000000 3.203447
## 3 0.1209354 0.0000000 1.2604153 0.1600795 0.0000000 0.000000 0.000000
## 4 1.3907574 0.9372941 9.3709136 1.4407153 0.7850027 0.000000 2.038557
## 5 1.2698220 0.0000000 1.9180232 0.0000000 2.0933405 0.000000 1.456112
## 6 0.7860803 0.0000000 0.3836046 0.9604769 1.0466702 0.460191 1.747335
##      NO117h2    NO121f0     NO121f2    NO121h0   NO121h2   NO125f0   NO125f2
## 1  0.0000000  1.5021482  0.59900377  2.3560599  0.000000 0.6270394 0.0000000
## 2  0.0000000  4.2413595  0.34228787  4.1156490  0.000000 0.8151513 0.0000000
## 3  2.0985182  1.9881373  3.16616279  2.8928837  5.004294 0.0000000 0.0000000
## 4 12.9726578 17.0096189 16.00195789 16.5818903 30.025764 1.2540789 1.9278469
## 5  0.0000000  2.4299456  0.08557197  2.5051777  1.154837 0.1254079 0.3855694
## 6  0.3815488  0.7952549  0.34228787  0.8052357  1.539783 2.3200459 0.3855694
##      NO125h0   NO125h2    NO131f0   NO131f2   NO131h0   NO131h2   NO135f0
## 1  1.3769786 0.0000000 3.87836375 0.0000000 4.9061389 1.0583363  1.713434
## 2  0.1967112 0.0000000 5.50477436 0.0000000 2.9953269 0.7937522  3.426867
## 3  1.8687566 1.4180002 0.06255425 0.0000000 0.7230099 1.0583363  1.761029
## 4 14.1632082 3.5450006 1.56385635 0.3529386 9.1409113 8.2021062 12.612775
## 5  0.7868449 0.0000000 1.43874784 0.0000000 2.2723169 0.2645841  1.856220
## 6  0.8852005 0.3545001 0.81320530 1.7646930 0.4131485 0.0000000  1.237480
##     NO135f2    NO135h0    NO135h2   NO146f0   NO146f2   NO146h0   NO146h2
## 1 0.0000000  1.9372761  0.2472569 3.0444569 0.9079499 1.6952729 0.4965870
## 2 0.0000000  3.4305931  0.7417708 0.3805571 0.7263599 0.3082314 0.7945392
## 3 0.0000000  2.4215951  3.7088540 0.0000000 0.5447699 0.4623471 0.0000000
## 4 1.2182694 15.4174888 18.7915268 0.5708357 1.8158997 3.6987771 2.4829350
## 5 0.9137021  1.8161963  0.2472569 0.3805571 0.4539749 0.3082314 0.0993174
## 6 0.0000000  0.8071984  0.4945139 0.3805571 0.5447699 0.3082314 0.5959044
##    NO150f0   NO150f2   NO150h0   NO150h2    NO22f0    NO22f2    NO22h0
## 1 4.848906 0.0000000 4.3087060 0.0000000 4.5709747 0.0000000 2.8216199
## 2 4.434975 1.4782425 3.8577949 0.5019471 3.2911018 0.0000000 2.2925662
## 3 0.177399 0.0000000 1.3527333 0.0000000 0.0000000 0.0000000 0.0000000
## 4 1.892256 1.4782425 6.3127552 2.0077882 1.2798729 1.2984748 1.5871612
## 5 1.241793 0.7391212 1.9539480 0.5019471 0.6094633 0.0000000 0.8817562
## 6 1.005261 0.7391212 0.3507086 0.5019471 0.7923023 0.4328249 2.5570931
##      NO22h2    NO27f0    NO27f2    NO27h0    NO27h2   NO32f0    NO32f2
## 1 0.0000000 3.6828320 0.7437243 2.9040732 0.2716667 3.552192 0.0000000
## 2 0.3080479 3.6828320 0.0000000 3.2670823 0.4075000 3.650864 0.4949708
## 3 0.6160959 0.1753730 0.3718622 0.0000000 0.4075000 0.000000 0.0000000
## 4 4.9287670 1.0522377 2.2311729 2.1780549 2.8525001 1.776096 1.7323980
## 5 0.3080479 1.0522377 0.3718622 1.0890274 0.4075000 2.269456 0.0000000
## 6 0.0000000 0.3507459 1.3015176 0.1815046 0.8150000 0.888048 0.7424563
##      NO32h0    NO32h2    NO37f0     NO37f2     NO37h0     NO37h2    NO40f0
## 1  3.445509 0.7533388 1.3913043 0.70336486  0.8315976  0.6248568 0.4319113
## 2  2.819053 0.2511129 1.5652174 0.51987838  0.2771992  0.1041428 1.5116897
## 3  2.270904 2.2600164 0.0000000 0.06116216  2.7719920  4.1309976 0.0000000
## 4 11.354519 5.6500410 0.8695652 1.52905405 12.4739641 18.9887033 0.2159557
## 5  1.487834 0.5022259 0.1739130 0.88685135  0.0000000  0.4165712 0.2159557
## 6  1.017991 0.6277823 0.6956522 0.64220270  0.5543984  0.9719994 4.1031577
##      NO40f2     NO40h0     NO40h2     NO47f0    NO47f2    NO47h0     NO47h2
## 1 0.2227340  1.5172918  0.6420715 3.87590296 0.6037583 2.6342518  0.8107016
## 2 0.8909359  1.2414206  1.0701191 5.04768758 0.0000000 4.2148030  1.0133770
## 3 0.0000000  2.4828412  3.8524289 0.09013728 0.0000000 0.3292815  1.8240786
## 4 0.2227340 19.7247936 26.3249305 0.99151006 0.0000000 2.1732578 13.1739008
## 5 0.4454679  0.8276137  0.8560953 1.26192189 0.3018791 0.7902756  1.6214032
## 6 0.2227340  1.5172918  0.2140238 1.08164734 0.0000000 0.3951378  0.4053508
##      NO48f0   NO48f2   NO48h0    NO48h2    NO50f0    NO50f2    NO50h0
## 1 5.3129401 0.000000 4.318752 0.1964405 8.3852185 0.0000000  2.259935
## 2 3.3809619 0.678554 2.670807 1.3750835 4.3922573 0.0000000  3.884264
## 3 0.0000000 0.000000 2.329853 1.9644050 0.0000000 0.0000000  1.906820
## 4 0.4829946 0.339277 9.148934 9.0362629 0.9316909 0.3839242 18.715090
## 5 1.2419860 0.000000 1.193339 0.0000000 1.0647897 0.3839242  1.977443
## 6 1.5179829 2.374939 1.420642 1.5715240 0.9316909 1.1517725  1.906820
##       NO50h2    NO51f0    NO51f2    NO51h0    NO51h2    NO56f0    NO56f2
## 1  0.0000000 0.7592828 0.6775729 2.0290429 0.4671365 1.7579734 0.8544474
## 2  0.2773026 2.3727587 0.1693932 1.8261386 0.3114243 3.5159468 0.2136119
## 3  1.6638155 0.0000000 0.0000000 0.4058086 1.0899851 0.0000000 0.0000000
## 4 15.2516424 0.8541931 0.3387864 0.9130693 3.4256675 0.6392631 0.2136119
## 5  0.0000000 1.1389242 0.3387864 1.2174257 0.0000000 0.4794473 0.0000000
## 6  2.2184207 1.6134759 0.5081797 1.7246865 0.3114243 0.6392631 0.8544474
##      NO56h0    NO56h2    NO68f0    NO68f2    NO68h0     NO68h2    NO73f0
## 1 2.8046540 0.7082324 1.5635631 0.0000000 0.6244423  0.1909014 0.3046655
## 2 2.8804555 0.7082324 0.6700985 0.8940396 1.4570320  0.5727042 0.6093311
## 3 0.4548088 0.7082324 0.1116831 0.0000000 0.2081474  1.5272113 0.0000000
## 4 1.1370219 3.7182198 1.7869292 0.6705297 8.6381184 11.4540846 0.9139966
## 5 0.9854190 0.1770581 2.4570277 0.2235099 2.9140640  0.3818028 0.3046655
## 6 2.2740438 0.3541162 1.0051477 1.3410593 0.6244423  0.3818028 0.9139966
##      NO73f2     NO73h0    NO73h2    NO77f0    NO77f2    NO77h0    NO77h2
## 1 0.3800955  0.5525031 0.2281805 1.6747464 0.1858686  2.008137 0.2179572
## 2 0.0000000  0.0000000 0.2281805 2.2708426 0.1858686  2.920926 0.4359145
## 3 0.0000000  1.9337610 0.0000000 1.0786502 0.0000000  2.669909 1.7436579
## 4 1.5203819 14.9175845 1.3690828 9.4239968 0.1858686 16.909424 3.4873158
## 5 0.0000000  0.8287547 0.0000000 1.3625056 0.0000000  1.665841 0.2179572
## 6 0.3800955  0.0000000 0.6845414 0.6812528 0.9293432  1.346364 0.0000000
##      NO89f0    NO89f2    NO89h0    NO89h2
## 1 4.5265527 0.3802715 3.8222503 0.0000000
## 2 4.8985981 0.1901358 3.3635802 0.0000000
## 3 0.0000000 0.1901358 1.8346801 1.6303852
## 4 1.7362120 0.3168929 9.4282173 7.0650024
## 5 2.1702650 0.3802715 1.8856435 0.2717309
## 6 0.9301136 0.4436501 0.3567434 1.3586543
write.table(merged_out, file = "ExpressionTable_with_gene.tsv", sep = "\t", row.names = FALSE, quote = FALSE)
# 1. データの読み込み
exp <- read.table("ExpressionTable.txt", header = FALSE, sep = "\t", stringsAsFactors = FALSE)
annot <- read.table("Annotation.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)

# 2. 位置情報 -> 遺伝子名のベースマッピングを作成
position_to_gene <- setNames(annot$gene, annot$TC)

# 3. ExpressionTableの1列目(位置情報)を基に、gene名をマッピング
original_positions <- exp$V1
mapped_genes <- ifelse(original_positions %in% names(position_to_gene),
                       position_to_gene[original_positions],
                       original_positions)

# 4. 同じgene名が複数回出現した場合に .1, .2, ... を付けてユニークにする
make_unique <- function(x) {
  ave(x, x, FUN = function(y) {
    if (length(y) == 1) return(y)
    paste0(y[1], ".", seq_along(y))
  })
}
unique_genes <- make_unique(mapped_genes)

# 5. ExpressionTableの1列目を置換
exp$V1 <- unique_genes

# 6. 対応表を作成(元の位置情報と新しいgene名の対応)
mapping_table <- data.frame(Position = original_positions, GeneName = unique_genes, stringsAsFactors = FALSE)

# 7. 出力
write.table(exp, file = "ExpressionTable_annotated.txt", sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(mapping_table, file = "Position_to_Gene_mapping.txt", sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)
# 1. アノテーション済み発現データを読み込む
exp_annot <- read.table("ExpressionTable_annotated.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)

# 2. 最初の列(遺伝子名)と残りの数値部分に分ける
gene_names_full <- exp_annot[, 1]
expression_values <- exp_annot[, -1]
expression_values <- apply(expression_values, 2, as.numeric)  # 数値に変換

# 3. 接尾辞(.1, .2 など)を取り除いて、ベースの遺伝子名だけを使う
gene_names_base <- sub("\\.\\d+$", "", gene_names_full)

# 4. 同じベース名ごとに合計
summarized <- aggregate(expression_values, by = list(Gene = gene_names_base), FUN = sum)

# 5. 結果を保存
write.table(summarized, file = "ExpressionTable_annotated_gene.txt", sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)

Differential expression analysis using edgeR

library(edgeR)
library(dplyr)

# データ読み込み
counts <- read.table("ExpressionTable_annotated_gene.txt", sep="\t", header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors=FALSE)
cohort <- read.table("Cohort.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)

# 一致するサンプルのみ抽出
common_samples <- intersect(colnames(counts), cohort$Cond)
counts <- counts[, common_samples]
cohort <- cohort[cohort$Cond %in% common_samples, ]

# group(NO.f, NO.h, OB.f, OB.h, POB.f, POB.h など)を因子として定義
group <- factor(cohort$newcond3)

# デザインマトリクスの作成
design <- model.matrix(~ 0 + group)  # インターセプトなしで各群を列に
colnames(design) <- levels(group)   # 列名を分かりやすく

dge <- DGEList(counts = counts, group = group)

keep <- filterByExpr(dge, design)
dge <- dge[keep, , keep.lib.sizes = FALSE]
dge <- calcNormFactors(dge)

dge <- estimateDisp(dge, design)
fit <- glmQLFit(dge, design)

# 比較したいペア(h vs f)
contrast_NO  <- makeContrasts(NO.h - NO.f, levels = design)
contrast_OB  <- makeContrasts(OB.h - OB.f, levels = design)
contrast_POB <- makeContrasts(POB.h - POB.f, levels = design)


qlf_NO  <- glmQLFTest(fit, contrast = contrast_NO)
qlf_OB  <- glmQLFTest(fit, contrast = contrast_OB)
qlf_POB <- glmQLFTest(fit, contrast = contrast_POB)

# 結果取得
res_NO  <- topTags(qlf_NO, n = Inf)$table
res_OB  <- topTags(qlf_OB, n = Inf)$table
res_POB <- topTags(qlf_POB, n = Inf)$table

# 保存
write.csv(res_NO, "edgeR_NO_h_vs_f_gene.csv")
write.csv(res_OB, "edgeR_OB_h_vs_f_gene.csv")
write.csv(res_POB, "edgeR_POB_h_vs_f_gene.csv")

# plot_volcano function that i defined earlier
plot_volcano(res_NO, title = "NO.h vs NO.f")

plot_volcano(res_OB, title = "OB.h vs OB.f")

plot_volcano(res_POB, title = "POB.h vs POB.f")

さらに遺伝子名をvolcano plotに入れる。

library(ggplot2)
library(ggrepel)

plot_volcano2 <- function(res, 
                         title = "Volcano Plot", 
                         logFC_threshold = 1, 
                         FDR_threshold = 0.05,
                         label_logFC = 2,
                         label_neglogFDR = 10) {
  res$gene <- rownames(res)
  
  res$Significant <- with(res, ifelse(FDR < FDR_threshold & abs(logFC) >= logFC_threshold,
                                      ifelse(logFC > 0, "Up", "Down"),
                                      "Not Significant"))
  
  res$label <- with(res, ifelse(abs(logFC) > label_logFC & -log10(FDR) > label_neglogFDR, gene, NA))
  
  ggplot(res, aes(x = logFC, y = -log10(FDR), color = Significant)) +
    geom_point(alpha = 0.7) +
    scale_color_manual(values = c("Up" = "red", "Down" = "blue", "Not Significant" = "grey")) +
    theme_minimal() +
    labs(title = title,
         x = "log2 Fold Change",
         y = "-log10 FDR") +
    geom_vline(xintercept = c(-logFC_threshold, logFC_threshold), linetype = "dashed", color = "black") +
    geom_hline(yintercept = -log10(FDR_threshold), linetype = "dashed", color = "black") +
    ggrepel::geom_text_repel(aes(label = label), max.overlaps = 20, segment.size = 0.2, min.segment.length = 0, size = 3)
}

plot_volcano2(res_NO, label_logFC = 1, label_neglogFDR = 10)
## Warning: Removed 14608 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

plot_volcano2(res_OB, label_logFC = 1.5, label_neglogFDR = 5)
## Warning: Removed 14612 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

plot_volcano2(res_POB, label_logFC = 2, label_neglogFDR = 10)
## Warning: Removed 14609 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

さらにfold changeで散布図を作る。

library(tibble)
# fold change だけ取り出し(行名を保持)
fc_POB <- res_POB %>% rownames_to_column("Gene") %>% select(Gene, logFC)
fc_NO <- res_NO %>% rownames_to_column("Gene") %>% select(Gene, logFC)

# フォールドチェンジのテーブルをGeneでマージ
fc_df_POBNO <- full_join(
  fc_NO %>% rename(FC_NO = logFC),
  fc_POB %>% rename(FC_POB = logFC),
  by = "Gene"
)


# 距離と象限を計算してから、上位5つずつ抽出 & ラベル
fc_df_POBNO_labeled <- fc_df_POBNO %>%
  mutate(
    quadrant = case_when(
      FC_NO < 0 & FC_POB > 0 ~ "Q2",
      FC_NO > 0 & FC_POB < 0 ~ "Q4",
      TRUE ~ "other"
    ),
    distance = sqrt(FC_NO^2 + FC_POB^2)
  )

top_q2 <- fc_df_POBNO_labeled %>% filter(quadrant == "Q2") %>% slice_max(distance, n = 5)
top_q4 <- fc_df_POBNO_labeled %>% filter(quadrant == "Q4") %>% slice_max(distance, n = 5)

# 全体の散布図
ggplot(fc_df_POBNO, aes(x = FC_NO, y = FC_POB)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
  theme_minimal() +
  labs(
    title = "Fold Change: POB vs NO (h vs f)",
    x = "log2(FC) NO.f vs NO.h",
    y = "log2(FC) POB.f vs POB.h"
  )

# ラベル付き散布図
ggplot(fc_df_POBNO_labeled, aes(x = FC_NO, y = FC_POB, color = quadrant)) +
  geom_point(alpha = 0.6) +
  geom_text_repel(data = top_q2, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
  geom_text_repel(data = top_q4, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
  scale_color_manual(values = c(Q2 = "orange", Q4 = "orange", other = "gray")) +
  theme_minimal()

venn diagram of DEGs

# 全てのDEG
deg_NO  <- rownames(res_NO[res_NO$FDR < 0.05, ])
deg_OB  <- rownames(res_OB[res_OB$FDR < 0.05, ])
deg_POB <- rownames(res_POB[res_POB$FDR < 0.05, ])

library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
# リスト化
venn_list <- list(
  NO  = deg_NO,
  OB  = deg_OB,
  POB = deg_POB
)

# ベン図をファイルに出力せずにプロット
venn.plot <- venn.diagram(
  x = venn_list,
  filename = NULL,
  col = "black",
  fill = c("dodgerblue", "orange", "forestgreen"),
  alpha = 0.5,
  cex = 1.2,
  cat.cex = 1.2,
  cat.pos = 0
)
grid.newpage()
grid.draw(venn.plot)

comparison of POB.f and NO.f, and POB.h and NO.h

# 比較したいペア(h vs f)
contrast_POBNOf  <- makeContrasts(POB.f - NO.f, levels = design)
contrast_POBNOh  <- makeContrasts(POB.h - NO.h, levels = design)

qlf_POBNOf  <- glmQLFTest(fit, contrast = contrast_POBNOf)
qlf_POBNOh  <- glmQLFTest(fit, contrast = contrast_POBNOh)

# 結果取得
res_POBNOf  <- topTags(qlf_POBNOf, n = Inf)$table
res_POBNOh  <- topTags(qlf_POBNOh, n = Inf)$table

# 保存
write.csv(res_POBNOf, "edgeR_POB_f_vs_NO_f_gene.csv")
write.csv(res_POBNOh, "edgeR_POB_h_vs_NO_h_gene.csv")

# plot_volcano function that i defined earlier
plot_volcano2(res_POBNOf, label_logFC = 5, label_neglogFDR = 3, title = "POB.f vs NO.f")
## Warning: Removed 14613 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

plot_volcano2(res_POBNOh, label_logFC = 5, label_neglogFDR = 3, title = "POB.h vs NO.h")
## Warning: Removed 14612 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

# get fold change
fc_f <- res_POBNOf %>% select(logFC)
fc_h <- res_POBNOh %>% select(logFC)

# 遺伝子名を共通キーにして結合
fc_df <- full_join(
  tibble(Gene = rownames(fc_f), FC_f = fc_f$logFC),
  tibble(Gene = rownames(fc_h), FC_h = fc_h$logFC),
  by = "Gene"
)

# 散布図を描画
ggplot(fc_df, aes(x = FC_f, y = FC_h)) +
  geom_point(alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
  theme_minimal() +
  labs(
    title = "Fold Change: h vs f (POB vs NO)",
    x = "log2(FC) POB.f vs NO.f",
    y = "log2(FC) POB.h vs NO.h"
  )

library(tibble)
# fold change だけ取り出し(行名を保持)
fc_f <- res_POBNOf %>% rownames_to_column("Gene") %>% select(Gene, logFC)
fc_h <- res_POBNOh %>% rownames_to_column("Gene") %>% select(Gene, logFC)

# フォールドチェンジのテーブルをGeneでマージ
fc_df <- full_join(
  fc_f %>% rename(FC_f = logFC),
  fc_h %>% rename(FC_h = logFC),
  by = "Gene"
)


# 距離と象限を計算してから、上位5つずつ抽出 & ラベル
fc_df_labeled <- fc_df %>%
  mutate(
    quadrant = case_when(
      FC_f < 0 & FC_h > 0 ~ "Q2",
      FC_f > 0 & FC_h < 0 ~ "Q4",
      TRUE ~ "other"
    ),
    distance = sqrt(FC_f^2 + FC_h^2)
  )

top_q2 <- fc_df_labeled %>% filter(quadrant == "Q2") %>% slice_max(distance, n = 5)
top_q4 <- fc_df_labeled %>% filter(quadrant == "Q4") %>% slice_max(distance, n = 5)

# ラベル付き散布図
ggplot(fc_df_labeled, aes(x = FC_f, y = FC_h, color = quadrant)) +
  geom_point(alpha = 0.6) +
  geom_text_repel(data = top_q2, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
  geom_text_repel(data = top_q4, aes(label = Gene), size = 3, max.overlaps = Inf, color = "black") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  geom_abline(slope = 1, intercept = 0, linetype = "solid", color = "blue") +
  scale_color_manual(values = c(Q2 = "orange", Q4 = "orange", other = "gray")) +
  coord_cartesian(xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5)) +
  theme_minimal()

Box plot of representative genes

library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)

# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)

# 遺伝子指定
target_gene <- "PPP1R3B"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])

# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))

# 条件情報を加工
plot_df <- plot_df %>%
  mutate(
    Group = sub("\\..*", "", newcond3),
    Time = sub(".*\\.", "", newcond3),
    Group = factor(Group, levels = c("NO", "OB", "POB")),
    Time = factor(Time, levels = c("f", "h")),
    FillColor = case_when(
      Group == "NO" ~ "dodgerblue",
      Group == "OB" ~ "orange",
      Group == "POB" ~ "forestgreen"
    ),
    AlphaVal = ifelse(Time == "f", 0.3, 0.8),
    Cond = newcond3
  )

# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))

# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
  vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
  vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
  if (length(vals1) >= 2 && length(vals2) >= 2) {
    t.test(vals1, vals2)$p.value
  } else {
    NA
  }
})

# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
  if (is.na(p)) return(NA)
  else if (p < 0.001) return("***")
  else if (p < 0.01) return("**")
  else if (p < 0.05) return("*")
  else return(NA)
})

# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]

# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
  geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
  geom_jitter(width = 0.2, size = 2) +
  scale_fill_identity() +
  scale_alpha_identity() +
  geom_signif(
    comparisons = sig_comparisons,
    annotations = sig_stars,
    tip_length = 0.01,
    textsize = 5,
    color = "black"  # 横棒とアスタリスクを黒で描画
  ) +
  theme_minimal() +
  labs(
    title = paste0(target_gene, " expression (CPM) across conditions"),
    x = "Condition", y = "CPM (Counts per Million)"
  )

library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)

# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)

# 遺伝子指定
target_gene <- "SLC2A4"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])

# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))

# 条件情報を加工
plot_df <- plot_df %>%
  mutate(
    Group = sub("\\..*", "", newcond3),
    Time = sub(".*\\.", "", newcond3),
    Group = factor(Group, levels = c("NO", "OB", "POB")),
    Time = factor(Time, levels = c("f", "h")),
    FillColor = case_when(
      Group == "NO" ~ "dodgerblue",
      Group == "OB" ~ "orange",
      Group == "POB" ~ "forestgreen"
    ),
    AlphaVal = ifelse(Time == "f", 0.3, 0.8),
    Cond = newcond3
  )

# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))

# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
  vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
  vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
  if (length(vals1) >= 2 && length(vals2) >= 2) {
    t.test(vals1, vals2)$p.value
  } else {
    NA
  }
})

# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
  if (is.na(p)) return(NA)
  else if (p < 0.001) return("***")
  else if (p < 0.01) return("**")
  else if (p < 0.05) return("*")
  else return(NA)
})

# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]

# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
  geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
  geom_jitter(width = 0.2, size = 2) +
  scale_fill_identity() +
  scale_alpha_identity() +
  geom_signif(
    comparisons = sig_comparisons,
    annotations = sig_stars,
    tip_length = 0.01,
    textsize = 5,
    color = "black"  # 横棒とアスタリスクを黒で描画
  ) +
  theme_minimal() +
  labs(
    title = paste0(target_gene, " expression (CPM) across conditions"),
    x = "Condition", y = "CPM (Counts per Million)"
  )

library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)

# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)

# 遺伝子指定
target_gene <- "PPARGC1A"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])

# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))

# 条件情報を加工
plot_df <- plot_df %>%
  mutate(
    Group = sub("\\..*", "", newcond3),
    Time = sub(".*\\.", "", newcond3),
    Group = factor(Group, levels = c("NO", "OB", "POB")),
    Time = factor(Time, levels = c("f", "h")),
    FillColor = case_when(
      Group == "NO" ~ "dodgerblue",
      Group == "OB" ~ "orange",
      Group == "POB" ~ "forestgreen"
    ),
    AlphaVal = ifelse(Time == "f", 0.3, 0.8),
    Cond = newcond3
  )

# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))

# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
  vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
  vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
  if (length(vals1) >= 2 && length(vals2) >= 2) {
    t.test(vals1, vals2)$p.value
  } else {
    NA
  }
})

# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
  if (is.na(p)) return(NA)
  else if (p < 0.001) return("***")
  else if (p < 0.01) return("**")
  else if (p < 0.05) return("*")
  else return(NA)
})

# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]

# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
  geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
  geom_jitter(width = 0.2, size = 2) +
  scale_fill_identity() +
  scale_alpha_identity() +
  geom_signif(
    comparisons = sig_comparisons,
    annotations = sig_stars,
    tip_length = 0.01,
    textsize = 5,
    color = "black"  # 横棒とアスタリスクを黒で描画
  ) +
  theme_minimal() +
  labs(
    title = paste0(target_gene, " expression (CPM) across conditions"),
    x = "Condition", y = "CPM (Counts per Million)"
  )

library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)

# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)

# 遺伝子指定
target_gene <- "PPARG"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])

# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))

# 条件情報を加工
plot_df <- plot_df %>%
  mutate(
    Group = sub("\\..*", "", newcond3),
    Time = sub(".*\\.", "", newcond3),
    Group = factor(Group, levels = c("NO", "OB", "POB")),
    Time = factor(Time, levels = c("f", "h")),
    FillColor = case_when(
      Group == "NO" ~ "dodgerblue",
      Group == "OB" ~ "orange",
      Group == "POB" ~ "forestgreen"
    ),
    AlphaVal = ifelse(Time == "f", 0.3, 0.8),
    Cond = newcond3
  )

# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))

# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
  vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
  vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
  if (length(vals1) >= 2 && length(vals2) >= 2) {
    t.test(vals1, vals2)$p.value
  } else {
    NA
  }
})

# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
  if (is.na(p)) return(NA)
  else if (p < 0.001) return("***")
  else if (p < 0.01) return("**")
  else if (p < 0.05) return("*")
  else return(NA)
})

# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]

# プロット
ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
  geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
  geom_jitter(width = 0.2, size = 2) +
  scale_fill_identity() +
  scale_alpha_identity() +
  geom_signif(
    comparisons = sig_comparisons,
    annotations = sig_stars,
    tip_length = 0.01,
    textsize = 5,
    color = "black"  # 横棒とアスタリスクを黒で描画
  ) +
  theme_minimal() +
  labs(
    title = paste0(target_gene, " expression (CPM) across conditions"),
    x = "Condition", y = "CPM (Counts per Million)"
  )

library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)

# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)

# 遺伝子指定
target_gene <- "ADIPOQ"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])

# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))

# 条件情報を加工
plot_df <- plot_df %>%
  mutate(
    Group = sub("\\..*", "", newcond3),
    Time = sub(".*\\.", "", newcond3),
    Group = factor(Group, levels = c("NO", "OB", "POB")),
    Time = factor(Time, levels = c("f", "h")),
    FillColor = case_when(
      Group == "NO" ~ "dodgerblue",
      Group == "OB" ~ "orange",
      Group == "POB" ~ "forestgreen"
    ),
    AlphaVal = ifelse(Time == "f", 0.3, 0.8),
    Cond = newcond3
  )

# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))

# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
  vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
  vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
  if (length(vals1) >= 2 && length(vals2) >= 2) {
    t.test(vals1, vals2)$p.value
  } else {
    NA
  }
})

# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
  if (is.na(p)) return(NA)
  else if (p < 0.001) return("***")
  else if (p < 0.01) return("**")
  else if (p < 0.05) return("*")
  else return(NA)
})

# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]

# プロット
p <- ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
  geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
  geom_jitter(width = 0.2, size = 2) +
  scale_fill_identity() +
  scale_alpha_identity() +
  theme_minimal() +
  labs(
    title = paste0(target_gene, " expression (CPM) across conditions"),
    x = "Condition", y = "CPM (Counts per Million)"
  )

# アスタリスクがあれば追加する
if (length(sig_stars) > 0) {
  p <- p + geom_signif(
    comparisons = sig_comparisons,
    annotations = sig_stars,
    tip_length = 0.01,
    textsize = 5,
    color = "black"
  )
}

# 表示
print(p)

changed the code because without significant difference the former code put out an error and cannnot plot.

library(ggplot2)
library(dplyr)
library(edgeR)
library(ggsignif)

# CPMに変換
cpm_matrix <- cpm(dge, log = FALSE)

# 遺伝子指定
target_gene <- "IL6"
tss_rows <- grep(paste0("^", target_gene), rownames(cpm_matrix), value = TRUE)
expr_values <- colSums(cpm_matrix[tss_rows, , drop = FALSE])

# データ整形
plot_df <- data.frame(Sample = names(expr_values), Expression = expr_values)
plot_df <- left_join(plot_df, cohort, by = c("Sample" = "Cond"))

# 条件情報を加工
plot_df <- plot_df %>%
  mutate(
    Group = sub("\\..*", "", newcond3),
    Time = sub(".*\\.", "", newcond3),
    Group = factor(Group, levels = c("NO", "OB", "POB")),
    Time = factor(Time, levels = c("f", "h")),
    FillColor = case_when(
      Group == "NO" ~ "dodgerblue",
      Group == "OB" ~ "orange",
      Group == "POB" ~ "forestgreen"
    ),
    AlphaVal = ifelse(Time == "f", 0.3, 0.8),
    Cond = newcond3
  )

# 検定対象の条件ペア
comparisons <- list(c("NO.f", "NO.h"), c("OB.f", "OB.h"), c("POB.f", "POB.h"))

# 各ペアでt検定し、p値とアスタリスク記号を取得
pvals <- sapply(comparisons, function(pair) {
  vals1 <- plot_df$Expression[plot_df$Cond == pair[1]]
  vals2 <- plot_df$Expression[plot_df$Cond == pair[2]]
  if (length(vals1) >= 2 && length(vals2) >= 2) {
    t.test(vals1, vals2)$p.value
  } else {
    NA
  }
})

# アスタリスクレベルを決定
stars <- sapply(pvals, function(p) {
  if (is.na(p)) return(NA)
  else if (p < 0.001) return("***")
  else if (p < 0.01) return("**")
  else if (p < 0.05) return("*")
  else return(NA)
})

# 有意な比較のみ抽出
sig_comparisons <- comparisons[!is.na(stars)]
sig_stars <- stars[!is.na(stars)]

# プロット
p <- ggplot(plot_df, aes(x = Cond, y = Expression, fill = FillColor, alpha = AlphaVal)) +
  geom_boxplot(outlier.alpha = 0.2, color = "gray30") +
  geom_jitter(width = 0.2, size = 2) +
  scale_fill_identity() +
  scale_alpha_identity() +
  theme_minimal() +
  labs(
    title = paste0(target_gene, " expression (CPM) across conditions"),
    x = "Condition", y = "CPM (Counts per Million)"
  )

# アスタリスクがあれば追加する
if (length(sig_stars) > 0) {
  p <- p + geom_signif(
    comparisons = sig_comparisons,
    annotations = sig_stars,
    tip_length = 0.01,
    textsize = 5,
    color = "black"
  )
}

# 表示
print(p)

TSS

library(ineq)
library(entropy)
## 
## Attaching package: 'entropy'
## The following object is masked from 'package:ineq':
## 
##     entropy
# 1. 読み込み
exp_raw <- read.table("ExpressionTable_annotated.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)

# 2. NAを除外して処理
exp_clean <- exp_raw[!is.na(exp_raw[, 1]), ]
rownames(exp_clean) <- make.unique(exp_clean[, 1])
exp_matrix <- apply(exp_clean[, -1], 2, as.numeric)

# 3. ベース遺伝子名を取得(.1, .2 を削除)
base_gene <- sub("\\.\\d+$", "", rownames(exp_clean))

# 4. 各ベース遺伝子ごとにTSSの分布から多様性指標を計算
calc_tss_diversity <- function(expr_mat, base_names) {
  gene_list <- unique(base_names)
  results <- data.frame(Gene = gene_list, NumTSS = NA, Entropy = NA, Gini = NA)

  for (i in seq_along(gene_list)) {
    g <- gene_list[i]
    idx <- which(base_names == g)
    if (length(idx) < 2) next  # 単一TSSは除外 or Entropy=0とする

    # 各TSSの発現量を合計(Across samples)
    tss_expr <- rowSums(expr_mat[idx, , drop = FALSE], na.rm = TRUE)

    # 割合(確率分布)に変換
    tss_frac <- tss_expr / sum(tss_expr)

    # Shannon entropy
    ent <- entropy::entropy(tss_frac, unit = "log2")  # 情報量

    # Gini係数(偏り)
    gini <- ineq::Gini(tss_frac)

    results[i, "NumTSS"] <- length(idx)
    results[i, "Entropy"] <- ent
    results[i, "Gini"] <- gini
  }
  return(na.omit(results))
}

# 実行
diversity_df <- calc_tss_diversity(exp_matrix, base_gene)

# 確認
head(diversity_df)
##           Gene NumTSS   Entropy      Gini
## 1     MTND1P23      4 0.4130105 0.7007458
## 2     MTATP6P1     55 3.8355813 0.7821932
## 4  RP11-206L10      2 0.7723556 0.2732201
## 7         AGRN      2 0.3626989 0.4308619
## 8       TTLL10      2 0.8149603 0.2476636
## 12  RP4-758J18      2 0.8452708 0.2273294
library(ggplot2)

# 1. NumTSS > 40 の遺伝子だけ抽出
many_tss <- diversity_df[diversity_df$NumTSS > 40, ]
# 2. プロット本体
ggplot(diversity_df, aes(x = NumTSS, y = Entropy)) +
  geom_jitter(width = 0.2, alpha = 0.6) +
  geom_smooth(method = "lm") +
  geom_text_repel(data = many_tss, aes(label = Gene), size = 3, color = "red") +
  theme_minimal() +
  labs(
    title = "TSS number and Shannon entropy",
    x = "TSS number",
    y = "Shannon Entropy"
  )
## `geom_smooth()` using formula = 'y ~ x'

# 例2: Gini vs Entropy(相関チェック)
ggplot(diversity_df, aes(x = Gini, y = Entropy)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm") +
  theme_classic() +
  labs(title = "TSS usage bias (Gini) and Entropy", x = "Gini coefficient", y = "Shannon Entropy")
## `geom_smooth()` using formula = 'y ~ x'

# logFCをres_NOから取得(行名=Gene名)
logfc_vec <- res_NO$logFC
names(logfc_vec) <- rownames(res_NO)

# Shannon/Gini用に使うベース遺伝子名と対応
diversity_df$logFC <- logfc_vec[as.character(diversity_df$Gene)]

# フィルター(必要ならNA除去)
plot_df <- na.omit(diversity_df)
# scatter plot
# 発現量(exp_matrix)からベース遺伝子ごとに合計
total_expr <- tapply(rowSums(exp_matrix, na.rm = TRUE), base_gene, sum)
diversity_df$TotalExpression <- total_expr[diversity_df$Gene]

# 散布図(log scale)
library(ggplot2)
ggplot(diversity_df, aes(x = NumTSS, y = log10(TotalExpression + 1))) +
  geom_jitter(width = 0.2, alpha = 0.6) +
  geom_smooth(method = "lm") +
  theme_minimal() +
  labs(title = "TSS number and expression (NO)", x = "TSS number", y = "log10(expression + 1)")
## `geom_smooth()` using formula = 'y ~ x'

shannon entropy

ggplot(plot_df, aes(x = Entropy, y = logFC)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm") +
  theme_classic() +
  labs(title = "Shannon Entropy and fold change (NO)", x = "Shannon Entropy", y = "log2 Fold Change")
## `geom_smooth()` using formula = 'y ~ x'

nothing interesting.

ggplot(plot_df, aes(x = as.factor(NumTSS), y = logFC)) +
  geom_boxplot(outlier.alpha = 0.3) +
  theme_bw() +
  labs(title = "fold change (NO) by TSS number", x = "TSS number", y = "log2 Fold Change")

nothing interesting.

Gini coefficient

plot_df$GiniGroup <- cut(plot_df$Gini, breaks = quantile(plot_df$Gini, probs = c(0, 0.33, 0.67, 1)),
                         labels = c("Low", "Medium", "High"), include.lowest = TRUE)

ggplot(plot_df, aes(x = GiniGroup, y = logFC)) +
  geom_boxplot(outlier.alpha = 0.3) +
  theme_bw() +
  labs(title = "Gini coefficient and fold change (NO)", x = "Gini Group", y = "log2 Fold Change")

no difference.